# Data Extractor

In [None]:
import ee

service_account = 'tec-semester@plantgrowthcycle.iam.gserviceaccount.com'

credentials = ee.ServiceAccountCredentials(
    service_account,
    'plantgrowthcycle-3acdcfc1f75c.json')

ee.Initialize(credentials)

import geetools
import geemap
import os

from typing import Union, List, Dict
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from oauth2client.service_account import ServiceAccountCredentials

In [None]:
class DataExtractor():
    
    def __init__(self, data_collection, start_date, end_date, bands):
        self.data_collection = data_collection
        self.start_date = start_date
        self.end_date = end_date
        self.bands = bands
        self.point = None
        self.region = None
        self.image_collection = None
        self.Map = None
 
    def set_point(self, point):
        point.reverse()
        self.point = point
        
    def set_interest_region(self, meters):
        """ 
        Select an arbitrary point and a distance in (meters),
        to construct a rectangle centered on the given point.
        """
        point = ee.Geometry.Point(self.point)        
        self.region = point.buffer(meters).bounds()
        
        Map = geemap.Map()
        Map.setCenter(self.point[0], self.point[1],zoom=15)
        Map.addLayer(point,
             {'color': 'black'},
             'Geometry [black]: point')
        
        Map.addLayer(self.region)
        
        self.Map = Map
        
        return self.Map

    def get_data_visualization(self):
        """ Get the Earth Engine image collection with the desired characteristics """
        self.image_collection = (
            ee.ImageCollection(self.data_collection)
            .select(self.bands)
            .filterBounds(self.region)
            .filterDate(self.start_date, self.end_date)
            .filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', 70))
        )
        
        print(f'The size of the collection is: {self.image_collection.size().getInfo()}')

        visualization = {
            "bands" : self.bands[0:3],
            "min": 300, 
            "max": 3500
        }

        self.Map.addLayer(
            ee_object=self.image_collection, 
            vis_params=visualization,
            name="Sentinel-2",
            shown=False)

        self.Map.add_time_slider(
            ee_object=self.image_collection,
            vis_params=visualization,
            time_interval=5
        )
        
    
    def extract_data(self, folder_name = "ClimateDate"):
        """ Iterates over the ImageCollection and send the images to Drive """        
        for band in self.bands:
            extra = dict(sat="S-HARMONIZED", band=band)
            geetools.batch.Export.imagecollection.toDrive(
                collection=self.image_collection.select(band),
                region=self.region,
                namePattern="{sat}_{id}_{system_date}_{band}",
                datePattern = "ddMMMy",
                dataType="int",
                folder=folder_name,
                extra=extra,
                verbose=True
            )

In [None]:
# Parameters.
data_collection = "COPERNICUS/S2_SR_HARMONIZED"
# Color bands.
bands =  ["B4","B3","B2","B8"] # RGB channels.

# TODO: Use a database entry.
# Destination folder.
folder_name = "ClimateAiDataManitoba19"
# Time windows.
start_date = "2019-04-01"
end_date = "2019-12-01"
# Coordinates
coordinates = {
    
    #Old trials
    "Iowa1" : {
        "0" : [43.027634245865684,-79.89996820467965],
    },
 
    #New trials
    "Iowa2" : {
        "0" : [41.75739907371553,-93.78163323429821],
    },
    "Wisconsin" : {
        "0" : [42.69094494303969,-88.82790109139866],
    },
    "Manitoba1" : {
        "0" : [ 49.48420963201276,-98.18843893508559],
    },
    "Manitoba2" : {
        "0" : [ 49.4361923859666,-98.1033562239458],
    },
    "RepoAjeno" : {
        "0" : [ 46.13542422542973, -100.57160713271173],
    },
    "Kansas1" : {
        "0" : [38.14223443436649, -97.72387049855455]}
    
}

In [None]:
# Call the DataExtractor class.
D = DataExtractor(
    data_collection=data_collection,
    start_date=start_date,
    end_date=end_date,
    bands=bands
)

# Select which location to study
center_of_field = coordinates["Kansas1"]["0"].copy()

point=D.set_point(center_of_field)

D.set_interest_region(400)


In [None]:
D.get_data_visualization()

In [None]:
D.extract_data(folder_name)

In [None]:
# Authenticate to Google Drive (of the Service account).
scopes = ['https://www.googleapis.com/auth/drive']

gauth = GoogleAuth()
gauth.credentials = ServiceAccountCredentials.from_json_keyfile_name(
    "plantgrowthcycle-3acdcfc1f75c.json"
    , scopes=scopes
)

drive = GoogleDrive(gauth)

In [None]:
def retrieve_data_from_Drive(
    folder_name: Union[str, None]
) -> None:
    """
    Extract all the files from a given folder.
    """
    # Get the list of all folders in the database.
    folder_list = drive.ListFile({'q': "'root' in parents and trashed=false"}).GetList()
    
    # Folder ID.
    folder_id = None
    
    for folder in folder_list:
        if folder['title'] == folder_name:
            folder_id = folder['id']
            print('title: %s, id: %s' % (folder['title'], folder['id']))
            print("--------------------------------------------------------")
            
    if folder_id:
        file_list = drive.ListFile({'q': "'{}' in parents and trashed=false".format(folder_id)}).GetList()
    
    for i, file1 in enumerate(sorted(file_list, key = lambda x: x['title']), start=1):
        print('Downloading {} from GDrive ({}/{})'.format(file1['title'], i, len(file_list)))
        file1.GetContentFile(file1['title'])

In [None]:
def see_data_from_Drive() -> None:
    """
    Visualize all the folders in the database. 
    """
    # Get the list of all folders in the database.
    folder_list = drive.ListFile({'q': "'root' in parents and trashed=false",'orderBy':'title'}).GetList()
    for folder in folder_list:
        print('title: %s, id: %s' % (folder['title'], folder['id']))
        print("-------------------------------------------------------------------------")    
    
def delete_folder_from_Drive(
    folder_id: Union[str, None]) -> None:
    """
    Delete the folder with the name folder_name. 
    """
    folders = drive.ListFile({'q': "'root' in parents and trashed=false"}).GetList()

    counter=0
    for folder in folders:
        if(folder['id'] == folder_id):
            counter+=1
            folder.Delete()
            print("Folder: %s with id: %s... successfully deleted." % (folder['title'],folder['id'][0:int(len(folder['id'])/2)]))
    if counter==0:
        print("Folder not found on Drive.")
        
def rename_folder_from_Drive(folder_id,new_name) -> None:
    """
    Renames the folder with folder_id by the name new_name. 
    """
    try:
        folder_list = drive.ListFile({'q': "'root' in parents and trashed=false",'orderBy':'title'}).GetList()
        folder = drive.auth.service.files().get(fileId=folder_id).execute()
        folder["title"] = new_name
        drive.auth.service.files().update(fileId=folder_id,body=folder).execute()
    except:
        print("Folder not found in Drive.\n")

In [None]:
retrieve_data_from_Drive("ClimateAiData-Manitoba-0.3CP-2019")

In [None]:
see_data_from_Drive()

In [None]:
delete_folder_from_Drive("1OMdcVMTmCk50EPF3CM0aF5eEj0Lp8Igm")

In [None]:
folder_id=""
name= "ClimateAiData-Manitoba-0.7CP-2020"

rename_folder_from_Drive(folder_id,name)

see_data_from_Drive()