In [None]:
# import libraries
import time
start_data_acquistion = time.time()
import os
import ee
import json
import shutil
import geemap
import rasterio as rio
import geopandas as gpd
from fiona.crs import from_epsg
from shapely.geometry import Polygon

# Trigger the authentication flow.
ee.Authenticate()
 
# Initialize the library.
ee.Initialize()

##### User input

In [None]:
### input for path
# local path
basePath = ''
shpName = ''
shpPath = os.path.join(basePath, shpName)
epsg = 'EPSG:32629'

### Set output directory
out_dir =  os.path.join(basePath, 'downloaded_data') # use when script is running in the local system
# out_dir = os.path.expanduser('~/Onedrive/Desktop/Little_place_labs/office/data') # use when script is running on colab
 
if not os.path.exists(out_dir):
    os.makedirs(out_dir)

# s3 path
bucket_name = ''
drive_path = ''

# define the input based on the data need to be downloaded
''' data type: # type of the data that needs to be downloaded
1. sentinal_SR: https://developers.google.com/earth-engine/datasets/catalog/COPERNICUS_S2_SR
2. sentinal_SR_harmonized: https://developers.google.com/earth-engine/datasets/catalog/COPERNICUS_S2_SR_HARMONIZED
3. landsat8_SR: https://developers.google.com/earth-engine/datasets/catalog/LANDSAT_LC08_C02_T1_L2
4. landsat8_RAW: https://developers.google.com/earth-engine/datasets/catalog/LANDSAT_LC08_C02_T1
5. modis_SR_500: https://developers.google.com/earth-engine/datasets/catalog/MODIS_061_MOD09GA
6. modis_SR_250: https://developers.google.com/earth-engine/datasets/catalog/MODIS_061_MOD09GQ
'''

data_type = 'sentinal_SR_harmonized' # 'sentinal_SR','sentinal_SR_harmonized', 'landsat8_SR', 'landsat8_RAW', 'modis_SR_500', 'modis_SR_250'
start_date = '2023-08-10' # start date from which the data neededs to be searched (YYYY-MM-DD)
end_date = '2023-08-11' # end date of the data search (YYYY-MM-DD)
draw_poly = False # input, i.e., True or False. if draw_poly=True, the aoi will be created manually on basemap and if draw_poly=False, aoi shp/geojson path is provided. Deafult to False
remove = False # input, i.e., True or False. if remove=True, the data used and generated from this script will be removed from local instance and if remove=False, then the data will not be removed from local instance. Deafult to False
upload = False # input, i.e., True or False. if upload=True, the data will be uploaded to s3 based on the path provided bu user and if upload=False, then the data will not be uploaded to s3. Deafult to False 

In [None]:
if data_type == 'sentinal_SR':
    mission_name = data_type.split('_')[0]
    data = ee.ImageCollection('COPERNICUS/S2_SR')
    cloud_coverage = 'CLOUDY_PIXEL_PERCENTAGE'
    percentage_coludcover = 10
    function = 'cloudmask'
    out_scale = 'default'
elif data_type == 'sentinal_SR_harmonized':
    mission_name = data_type.split('_')[0]
    data = ee.ImageCollection("COPERNICUS/S2_SR_HARMONIZED")
    cloud_coverage = 'CLOUDY_PIXEL_PERCENTAGE'
    percentage_coludcover = 20
    function = 'pass'
    out_scale = 'default'
elif data_type == 'landsat8_SR':
    mission_name = data_type.split('_')[0]
    data = ee.ImageCollection("LANDSAT/LC08/C02/T1_L2")
    cloud_coverage = 'CLOUD_COVER'
    percentage_coludcover= 10
    function = 'scalefactor'
    out_scale = 'default'
elif data_type == 'landsat8_RAW':
    mission_name = data_type.split('_')[0]
    data = ee.ImageCollection("LANDSAT/LC08/C02/T1")
    cloud_coverage = 'CLOUD_COVER'
    percentage_coludcover = 10
    function = 'pass'
    out_scale = 'default'
elif data_type == 'modis_SR_500':
    mission_name = data_type.split('_')[0]
    data = ee.ImageCollection("MODIS/061/MOD09GA")
    function = 'pass'
    out_scale = 'default'
elif data_type == 'modis_SR_250':
    mission_name = data_type.split('_')[0]
    data = ee.ImageCollection("MODIS/061/MOD09GQ") 
    function = 'pass'
    out_scale = 'default'

##### Functions

In [None]:
def maskS2clouds(image):
    '''
    Function to mask clouds using the Sentinel-2 QA band
    Input parameters:
    1. param {ee.Image} image Sentinel-2 image
    
    Output:
    Return {ee.Image} cloud masked Sentinel-2 image
    '''
    qa = image.select('QA60')

    # Bits 10 and 11 are clouds and cirrus, respectively.
    cloudBitMask = 1 << 10
    cirrusBitMask = 1 << 11

    # Both flags should be set to zero, indicating clear conditions.
    mask = qa.bitwiseAnd(cloudBitMask).eq(0) \
      .And(qa.bitwiseAnd(cirrusBitMask).eq(0))

    return image.updateMask(mask).divide(10000)

def dataformat(image):
    '''
    Function to normalize the sentinel-2 data
    Input parameters:
    1. param {ee.Image} image Sentinel-2 image
    
    Output:
    Return {ee.Image} normalized Sentinel-2 image
    '''
    return image.divide(10000)

def applyScaleFactors(image):
    '''
    Function to apply scaling factors for landsat8
    Input parameters:
    1. param {ee.Image} image landsat8 image
    
    Output:
    Return {ee.Image} scaled landsat8 image
    '''
    opticalBands = image.select('SR_B.').multiply(0.0000275).add(-0.2)
    thermalBands = image.select('ST_B.*').multiply(0.00341802).add(149.0)
    return image.addBands(opticalBands, None, True) \
              .addBands(thermalBands, None, True)

def s3_upload(bucket_name, local_path, drive_path):
    '''
    Function is to upload the whole directory data to the desired location on s3
    Input parameters
    1. bucket_name: Define the name of the s3 bucket where data needs to be uploaded
    2. local_path: Location of the directory in the local instance
    3. drive_path: Location of the directory in the s3 bucket
    '''
    print(f'Data is uploading from local to s3....')
    s3 = boto3.client('s3')
    for file in os.listdir(local_path):
        if not file.startswith('.'):
            local_file_path = os.path.join(local_path, file)
            drive_file_path = os.path.join(drive_path, file)
            s3.upload_file(Filename=local_file_path, Bucket=bucket_name, Key=drive_file_path)

##### Add Base Map

In [None]:
# draw the study area using the drawing tool for which the data neede to be downloaded
if draw_poly == True:
    Map = geemap.Map()
    display(Map)

In [None]:
if draw_poly == True:
    roi = Map.user_roi # user define roi
    geometry = roi.getInfo() # to set the roi coordinates
elif draw_poly == False:
    gdf = gpd.read_file(shpPath)
    js = json.loads(gdf.to_json())
    roi = ee.Geometry(ee.FeatureCollection(js).geometry())
    geometry = roi.getInfo()

##### Load Earth Engine datasets

In [None]:
data = data
filtered = data.filter(ee.Filter.date(start_date, end_date)) \
    .filter(ee.Filter.bounds(geometry))

if mission_name == 'sentinal':
    filtered = filtered.filter(ee.Filter.lt(cloud_coverage, percentage_coludcover))
elif mission_name == 'landsat8':
    filtered = filtered.filter(ee.Filter.lt(cloud_coverage, percentage_coludcover))
elif mission_name == 'modis':
    pass

if function == 'cloudmask':
    filtered = filtered.map(maskS2clouds)
elif function == 'scalefactor':
    filtered = filtered.map(applyScaleFactors)
elif function == 'pass':
    filtered = filtered.map(dataformat)

##### Download Result

In [None]:
# Number of images are available
image_ids = filtered.aggregate_array('system:index').getInfo()
print('Total images: ', len(image_ids))

In [None]:
# download imagery
# filename = os.path.join(out_dir, 'sentinal.tif')

if out_scale == 'default':
  # geemap.download_ee_image(filtered, filename) # download single image
  geemap.download_ee_image_collection(filtered, out_dir, region=roi) # download image collection
else:
  # geemap.download_ee_image(filtered, filename, scale=out_scale) # download single image
  geemap.download_ee_image_collection(filtered, out_dir, region=roi, scale=out_scale) # download image collection

In [None]:
### Export to s3
if upload == True:
    # Connect to S3 bucket and download file 
    s3_upload(bucket_name, out_dir, drive_path)
elif upload == False:
    pass

# remove the data from the local
if remove == True:
    shutil.rmtree(out_dir)  # remove raw imagery folder
elif remove == False:
    pass

In [None]:
end = time.time()
end_data_acquistion = time.time()
print("The time of execution of data acquistion script:",(end_data_acquistion-start_data_acquistion))