In [None]:
# coordinate system of Spain field data: EPSG: 3042

# info from: https://gis.stackexchange.com/questions/362582/coordinate-system-mismatch-in-folium 
# all coordinates passed to Leaflet functions/methods are always EPSG:4326.
# [latitude, longitude]

# when defining/creating map with folium.Map call, actual crs of map has to be specified. Map is in EPSG:3857

# transforming coordinates with gdal:
# https://gdal.org/programs/gdaltransform.html




In [1]:

import sys

# check if GEE is already imported to avoid requesting authenticatiation multiple times
modulename = 'ee'
if modulename not in sys.modules: 
   # import GEE and Authenticate, token or log in will be asked from web browser
   import ee
   #ee.Authenticate()
   ee.Initialize()
else:
   print('GEE already imported')
   # google earth engine already imported and authenticated

import pandas as pd


In [None]:
"""
var table = ee.FeatureCollection([
  ee.Geometry.Point([10, 0]),
  ee.Geometry.Point([10, 0.1]),
  ee.Geometry.Point([10, 0.2])
])

var buffered = table.map(function (feature) {
  return feature.buffer(5000, 1)
})
"""

class FieldData: 
    # constructor
    def __init__(self,csvFileName, proj):
        self.proj = proj 
        low_memory=False
        self.df = pd.read_csv(csvFileName) 
        self.df = self.df.reset_index()
        self.indexLb = "indexField"
        self.df[self.indexLb] = list(range(1,len(self.df)+1))
        self.bufferredPoints = None
        self.sampleSize = 300
        self.exportPlotDataWithAddedIdentifiers("FieldDataWithIdentifiers.csv")


    # method that returns the number of rows/plot data contained within the csv file
    def getLen(self):
        return len(self.df.index)
   
    """
    # 2nd constructor that can reset the class using a given dataframe
    def reset(self, dataframe, proj):
        self.proj = proj 
        low_memory=False
        self.df = dataframe
        self.distance = 100
        if self.indexLb in self.df.columns:
           self.df.drop(columns=[self.indexLb])
        self.df[self.indexLb] = list(range(1,len(self.df)+1))
        self.bufferredPoints = None
        self.sampleSize = 300
        self.exportPlotDataWithAddedIdentifiers("FieldDataWithIdentifiers.csv")
    """

    ## method that returns the smallest and bigger available year withing the field data
    ## @param[in] yearCol the name of the column that states the years
    ## @returns [min,max] the smallest and bigger year included
    def getMinMaxYear(self,yearlabel):
        years = self.df[yearlabel]
        return([min(years),max(years)])
    
    # @brief method that keeps the years of interests (inclusive) and discards the rest
    def filterYearsOfInterest(self,startYear,endYear,yearlabel):
        self.df = self.df[self.df[yearlabel].isin(list(range(startYear,endYear+1)))]
        self.df = self.df.reset_index()
        return None
    
    def bufferPoint(self,feature):
        return feature.buffer(self.distance, 1)
    
    ## method that returns a dataframe containing the data of the year of interst
    #  @param[in] year the year of interest
    #  @param[in] yearlabel the name of the column containng the years
    def getYearOfInterest(self,year,yearlabel):        
        tmpdf = self.df[self.df[yearlabel] == year]
        tmpdf = tmpdf.reset_index()
        return tmpdf

    # @brief method that reads the coordinates stored in the xlabel, and ylabel and creates a polygon with radius r 
    # @param[in] xlabel the label of the column containing the x coordinates
    # @param[in] ylabel the label of the column containing the y coordinates
    # @param[in] r size of the radius in meters
    # @param[in] id the label of the column that defines the name of the plot
    # @param[in] currentMin used to load a sample of the data this is the min value of the field range to be loaded
    # @param[in] currentMax used to load a sample of the data this is the max value of the field date range to be loaded
    def createBufferedPoints(self,xlabel, ylabel, r, currentMin,currentMax):
        if (len(self.df)==0):
            # then dataframe has no rows
            return None 
        tmpdf = self.df.iloc[currentMin:currentMax]

        if tmpdf.empty:
            return None
        
        # create a feature collection with the first location
        x = 0
        y = 0 
        indx = 0
        for index, row in tmpdf.iterrows():
            x    = row[xlabel]
            y    = row[ylabel]
            indx = row[self.indexLb]
            break

        self.bufferredPoints = None
        # pointList is not defined in a loop to make sure memory allocation is preserved after 
        # the loop is deleted
        self.bufferredPoints = ee.FeatureCollection(
              [ee.Feature(
                 ee.Geometry.Point(
                        [x,y],self.proj
                        ),
                    {
                        self.indexLb : indx,
                        "system:index" : "0"
                    }
                    ).buffer(r)]
                )
        
        # add other locations to the feature collection
        i = 0
        for index, row in tmpdf.iterrows():
            if i==0 :
                i = 1
                continue
            self.bufferredPoints = self.bufferredPoints.merge(ee.FeatureCollection(
                [ee.Feature(
                    ee.Geometry.Point(
                        [row[xlabel],row[ylabel]],self.proj
                        ),
                    {
                        self.indexLb : row[self.indexLb],
                        "system:index" : "0"
                    }
                    ).buffer(r)]
                ))
        
        return self.bufferredPoints

    
    def exportfeatureVectorsToDrive(self,collection, outCsvFeatureVectors, driveFolder, iscale):
        if (self.pointsList == None) : 
            print ("Plot data have not been read yet. Please call \"createBufferedPoints", \
            "(xlabel, ylabel, r)\" first")
            return
        collection = collection.toBands()
        #firstImg  =  collection.first()
        bandNames = collection.bandNames()
        print (bandNames.getInfo())
        training = collection.sampleRegions(
            collection = self.pointsList,
            properties = [self.indexLb],
            scale      = iscale,
            projection = self.proj,
            geometries = True
        )
        
        # TO DO: COMMENT WHEN NOT TESTING OUTPUT AS BATCH COMMANDS ARE LIMITED
        print("STARTING BATCH SCRIPT FOR EXPORTING FILE")
        task = ee.batch.Export.table.toDrive(**{
            'collection' : training,
            'description' : outCsvFeatureVectors,
            'folder' : driveFolder,
            'fileFormat' : "CSV"
        })
        #task.start()
        print("END OF CALLING BATCH SCRIPT")   

  


    def exportPlotDataWithAddedIdentifiers(self, nameOfNewPlotFile):
        #print("exporting field data to ", nameOfNewPlotFile)
        self.df.to_csv(nameOfNewPlotFile)

    def printFieldData(self):
        print(self.df.to_string())  



    # get mean and std for one band of an image for each buffered point
    def getInfoRegions(self,collection,bandName, bpoints):
    # bnamestr = bandName.get('band')
        return collection.select([bandName]).reduceRegions(**{
            'collection': bpoints.select("indexField"),
            'reducer': ee.Reducer.mean().combine(**{
                'reducer2': ee.Reducer.stdDev(),
                'sharedInputs': True
            }),
            'scale': 50#,
            #'bestEffort': True  # Use maxPixels if you care about scale.
        }).map(lambda feature: feature.set('bandName',bandName))  \
        .filter(ee.Filter.neq('mean',None))


    def getFeatureCollection(self,collection,bpoints):
        bandNamesImg = collection.bandNames().getInfo()
        print('Band names: ', bandNamesImg)
        for band in bandNamesImg :
            if(not isinstance(band,str)):
                bandNamesImg.remove(band)
        featureCollection = ee.FeatureCollection([])
        for band in bandNamesImg:
            features = self.getInfoRegions(collection,band,bpoints)
            featureCollection = featureCollection.merge(features)
        return featureCollection




    def processMatchesMean(self,row):
        # Get the list of all features with a unique row ID.
        matches = ee.List(row.get('matches'))
        # Map a function over the list of rows to return a list of column ID and value.
        values = matches.map(lambda feature: [ee.Feature(feature).get('bandName'), ee.Feature(feature).get('mean')])
        # Return the row with its ID property and properties for all matching column IDs storing the output of the reducer.
        return row.select(['indexField']).set(ee.Dictionary(values.flatten()))


    ## Format a table of triplets into a 2D table of rowId x colId.
    def formatMean (self,table):
        # Get a FeatureCollection with unique row IDs.
        rows = table.distinct('indexField')
        filterEq = ee.Filter.equals(leftField='indexField', rightField='indexField')
        innerJoin = ee.Join.saveAll('matches')
        toyJoin = innerJoin.apply(primary=rows, secondary=table, condition=filterEq)
        return toyJoin.map(algorithm = self.processMatchesMean)

    def processMatchesStd(self,row):
        # Get the list of all features with a unique row ID.
        matches = ee.List(row.get('matches'))
        # Map a function over the list of rows to return a list of column ID and value.
        values = matches.map(lambda feature: [ee.Feature(feature).get('bandName'), ee.Feature(feature).get('stdD')])
        # Return the row with its ID property and properties for all matching column IDs storing the output of the reducer.
        return row.select(['indexField']).set(ee.Dictionary(values.flatten()))


    ## Format a table of triplets into a 2D table of rowId x colId.
    def formatStd (self,table):
        # Get a FeatureCollection with unique row IDs.
        rows = table.distinct('indexField')
        filterEq = ee.Filter.equals(leftField='indexField', rightField='indexField')
        innerJoin = ee.Join.saveAll('matches')
        toyJoin = innerJoin.apply(primary=rows, secondary=table, condition=filterEq)
        return toyJoin.map(algorithm = self.processMatchesStd)


    # collection = s2bands
    def exportFeaturesMeanStdCSV(self,collection,ouutCsvFeatureVectors,driveFolder):
        if (self.bufferredPoints == None):
            raise Exception("Please call createBufferedPoints(xlabel, ylabel, r) function first" )
        featureCollection = self.getFeatureCollection(collection,self.bufferredPoints)

        tableMean = self.formatMean(featureCollection)
        tableStd  = self.formatStd (featureCollection)
        
        meanName = ouutCsvFeatureVectors+"_mean"
        stdName = ouutCsvFeatureVectors+"_stdD"
        print ("START EXPORTING FEATURES VECTORS OF A SINGLE FILE")
        task = ee.batch.Export.table.toDrive(**{
            'collection':tableMean,
            'description':meanName,
            'folder': driveFolder,
            'fileFormat':'CSV'
        })
        task.start()

        task = ee.batch.Export.table.toDrive(**{
            'collection':tableStd,
            'description':stdName,
            'folder': driveFolder,
            'fileFormat':'CSV'
        })
        task.start()
        print("STOP EXPORTING: CHECK PROGRESS ON GOOGLE EARTH ENGINE - FILES ON DRIVE")


In [None]:
print ("Class fieldData imported")