# Data Extractor

In [12]:
import ee

service_account = 'tec-semester@plantgrowthcycle.iam.gserviceaccount.com'

credentials = ee.ServiceAccountCredentials(
    service_account,
    'plantgrowthcycle-3acdcfc1f75c.json')

ee.Initialize(credentials)

import geetools
import geemap
import os

from typing import Union, List, Dict
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from oauth2client.service_account import ServiceAccountCredentials

In [38]:
class DataExtractor():
    
    def __init__(self, data_collection, start_date, end_date, bands):
        self.data_collection = data_collection
        self.start_date = start_date
        self.end_date = end_date
        self.bands = bands
        self.point = None
        self.region = None
        self.image_collection = None
        self.Map = None
 
    def set_point(self, point):
        self.point = point
        
    def set_interest_region(self, meters):
        """ 
        Select an arbitrary point and a distance in (meters),
        to construct a rectangle centered on the given point.
        """
        point = ee.Geometry.Point(self.point)        
        self.region = point.buffer(meters).bounds()
        
        Map = geemap.Map()
        Map.setCenter(self.point[0], self.point[1],zoom=15)
        Map.addLayer(point,
             {'color': 'black'},
             'Geometry [black]: point')
        
        Map.addLayer(self.region)
        
        self.Map = Map
        
        return self.Map

    def get_data_visualization(self):
        """ Get the Earth Engine image collection with the desired characteristics """
        self.image_collection = (
            ee.ImageCollection(self.data_collection)
            .select(self.bands)
            .filterBounds(self.region)
            .filterDate(self.start_date, self.end_date)
            .filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', 30))
        )
        
        print(f'The size of the collection is: {self.image_collection.size().getInfo()}')

        visualization = {
            "bands" : self.bands[0:3],
            "min": 300, 
            "max": 3500
        }

        self.Map.addLayer(
            ee_object=self.image_collection, 
            vis_params=visualization,
            name="Sentinel-2",
            shown=False)

        self.Map.add_time_slider(
            ee_object=self.image_collection,
            vis_params=visualization,
            time_interval=5
        )
        
    
    def extract_data(self, folder_name = "ClimateDate"):
        """ Iterates over the ImageCollection and send the images to Drive """        
        for band in self.bands:
            extra = dict(sat="S-HARMONIZED", band=band)
            geetools.batch.Export.imagecollection.toDrive(
                collection=self.image_collection.select(band),
                region=self.region,
                namePattern="{sat}_{id}_{system_date}_{band}",
                datePattern = "ddMMMy",
                dataType="int",
                folder=folder_name,
                extra=extra,
                verbose=True
            )

In [46]:
# Parameters.
data_collection = "COPERNICUS/S2_SR_HARMONIZED"
# Color bands.
bands =  ["B4","B3","B2","B8"] # RGB channels.

# TODO: Use a database entry.
# Destination folder.
folder_name = "ClimateAiDataManitoba"
# Time windows.
start_date = "2020-04-01"
end_date = "2021-04-01"
# Coordinates
coordinates = {
    
    #Old trials
    "Germany" : {
        "0" : [10.109178, 52.295949],
    } ,
    "Iowa1" : {
        "0" : [-79.89996820467965, 43.027634245865684],
    },
 
    #New trials
    "Iowa2" : {
        "0" : [-93.78163323429821, 41.75739907371553],
    },
     "Iowa3" : {
        "0" : [ -91.4897364590952, 41.824337504215926],
    },
    "Wisconsin" : {
        "0" : [-88.82790109139866,42.69094494303969],
    },
    "Manitoba" : {
        "0" : [ -98.18843893508559, 49.48420963201276],
    }
}

In [48]:
# Call the DataExtractor class.
D = DataExtractor(
    data_collection=data_collection,
    start_date=start_date,
    end_date=end_date,
    bands=bands
)

point=D.set_point(coordinates["Iowa2"]["0"])
D.set_interest_region(800)


Map(center=[41.75739907371553, -93.78163323429821], controls=(WidgetControl(options=['position', 'transparent_…

In [49]:
D.get_data_visualization()

The size of the collection is: 58


In [None]:
D.extract_data(folder_name)

In [None]:
# Authenticate to Google Drive (of the Service account).
scopes = ['https://www.googleapis.com/auth/drive']

gauth = GoogleAuth()
gauth.credentials = ServiceAccountCredentials.from_json_keyfile_name(
    "plantgrowthcycle-3acdcfc1f75c.json"
    , scopes=scopes
)

drive = GoogleDrive(gauth)

In [None]:
def retrieve_data_from_Drive(
    folder_name: Union[str, None]
) -> None:
    """
    Extract 
    """
    # Get the list of all folders in the database.
    folder_list = drive.ListFile({'q': "'root' in parents and trashed=false"}).GetList()
    
    # Folder ID.
    folder_id = None
    
    for folder in folder_list:
        if folder['title'] == folder_name:
            folder_id = folder['id']
            print('title: %s, id: %s' % (folder['title'], folder['id']))
            print("----------------------------------------------")
            
    if folder_id:
        file_list = drive.ListFile({'q': "'{}' in parents and trashed=false".format(folder_id)}).GetList()
    
    for i, file1 in enumerate(sorted(file_list, key = lambda x: x['title']), start=1):
        print('Downloading {} from GDrive ({}/{})'.format(file1['title'], i, len(file_list)))
        file1.GetContentFile(file1['title'])

In [None]:
retrieve_data_from_Drive(folder_name)