# Data Extractor

In [1]:
import ee

service_account = 'tec-semester@plantgrowthcycle.iam.gserviceaccount.com'

credentials = ee.ServiceAccountCredentials(
    service_account,
    'plantgrowthcycle-3acdcfc1f75c.json')

ee.Initialize(credentials)

import geetools
import geemap
import os

from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from oauth2client.service_account import ServiceAccountCredentials

In [2]:
#Angel se la come

class DataExtractor():
    
    def __init__(self, data_collection, start_date, end_date, bands):
        self.data_collection = data_collection
        self.start_date = start_date
        self.end_date = end_date
        self.bands = bands
        self.point = None
        self.region = None
        self.image_collection = None
        self.Map = None
 
    def set_point(self, point):
        self.point = point
        
    def set_interest_region(self, meters):
        """ 
        Select an arbitrary point and a distance in (meters),
        to construct a rectangle centered on the given point.
        """
        point = ee.Geometry.Point(self.point)        
        self.region = point.buffer(meters).bounds()
        
        Map = geemap.Map()
        Map.setCenter(self.point[0], self.point[1])
        Map.addLayer(point,
             {'color': 'black'},
             'Geometry [black]: point')
        
        Map.addLayer(self.region)
        
        self.Map = Map
        
        return self.Map

    def get_data_visualization(self):
        """ Get the Earth Engine image collection with the desired characteristics """
        self.image_collection = (
            ee.ImageCollection(self.data_collection)
            .select(self.bands)
            .filterBounds(self.region)
            .filterDate(self.start_date, self.end_date)
            .filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', 30))
        )
        
        print(f'The size of the collection is: {self.image_collection.size().getInfo()}')

        visualization = {
            "bands" : self.bands[0:3],
            "min": 300, 
            "max": 3500
        }

        self.Map.addLayer(
            ee_object=self.image_collection, 
            vis_params=visualization,
            name="Sentinel-2",
            shown=False)

        self.Map.add_time_slider(
            ee_object=self.image_collection,
            vis_params=visualization,
            time_interval=5
        )
    
    def extract_data(self, folder_name = "ClimateDate"):
        """ Iterates over the ImageCollection and send the images to Drive """        
        for band in self.bands:
            extra = dict(sat="S-HARMONIZED", band=band)
            geetools.batch.Export.imagecollection.toDrive(
                collection=self.image_collection.select(band),
                region=self.region,
                namePattern="{sat}_{id}_{system_date}_{band}",
                datePattern = "ddMMMy",
                dataType="int",
                folder=folder_name,
                extra=extra,
                verbose=True
            )

In [3]:
# Parameters.
data_collection = "COPERNICUS/S2_SR_HARMONIZED"
# Destination folder.
folder_name = "ClimateAiData"
# Time windows.
start_date = "2021-08-01"
end_date = "2021-09-01"
# Color bands.
bands =  ["B4","B3","B2","B8"] # RGB channels.
# Coordinates
coordinates = {
    "Germany" : {
        "0" : [10.109178, 52.295949],
    } ,
    "Iowa" : {
        "0" : [-91.47321072391104, 41.87728596488476],
    },
    "Iowa2" : {
        "0" : [-93.78163323429821, 41.75739907371553],
    }
}

In [4]:
# Call the DataExtractorClass.
D = DataExtractor(
    data_collection=data_collection,
    start_date=start_date,
    end_date=end_date,
    bands=bands
)

D.set_point(coordinates["Iowa2"]["0"])
D.set_interest_region(1500)

Map(center=[41.75739907371553, -93.78163323429821], controls=(WidgetControl(options=['position', 'transparent_…

In [5]:
D.get_data_visualization()

The size of the collection is: 10


In [6]:
D.extract_data(folder_name)

exporting S-HARMONIZED_20210802T170849_20210802T171859_T15TVG_02Aug2021_B4 to folder 'ClimateAiData' in GDrive
exporting S-HARMONIZED_20210804T165901_20210804T170637_T15TVG_04Aug2021_B4 to folder 'ClimateAiData' in GDrive
exporting S-HARMONIZED_20210809T165849_20210809T171029_T15TVG_09Aug2021_B4 to folder 'ClimateAiData' in GDrive
exporting S-HARMONIZED_20210812T170849_20210812T171908_T15TVG_12Aug2021_B4 to folder 'ClimateAiData' in GDrive
exporting S-HARMONIZED_20210814T165901_20210814T170626_T15TVG_14Aug2021_B4 to folder 'ClimateAiData' in GDrive
exporting S-HARMONIZED_20210817T170851_20210817T171432_T15TVG_17Aug2021_B4 to folder 'ClimateAiData' in GDrive
exporting S-HARMONIZED_20210819T165849_20210819T170004_T15TVG_19Aug2021_B4 to folder 'ClimateAiData' in GDrive
exporting S-HARMONIZED_20210822T170849_20210822T171856_T15TVG_22Aug2021_B4 to folder 'ClimateAiData' in GDrive
exporting S-HARMONIZED_20210824T165851_20210824T170900_T15TVG_24Aug2021_B4 to folder 'ClimateAiData' in GDrive
e

In [13]:
# Authenticate to Google Drive (of the Service account)
gauth = GoogleAuth()
scopes = ['https://www.googleapis.com/auth/drive']
gauth.credentials = ServiceAccountCredentials.from_json_keyfile_name("plantgrowthcycle-3acdcfc1f75c.json"
                                                                     ,scopes=scopes)

drive = GoogleDrive(gauth)

# Query the files that are not folders.
file_list = drive.ListFile(
    {'q': "trashed=False and mimeType != 'application/vnd.google-apps.folder'", 'maxResults': 100}
)
# Get a list of files.
files = file_list.GetList()

In [14]:
# Iterate over the file list.
for file in files:
    filename = file['title']
    print(filename)
    # Download file into working directory (in this case a tiff-file)
    file.GetContentFile(filename, mimetype="image/tiff")

    # Delete file afterwards to keep the Drive empty
    file.Delete()