In [None]:
# ! pip install geedim geemap earthengine-api python-dotenv

# Import Packages

In [None]:
import os
import ee
import json
import geemap
import pandas as pd
from datetime import datetime
from dotenv import load_dotenv

# Set-up Environment

In [None]:
# load the .env file variables 
load_dotenv()
EE_PROJECT_NAME = os.getenv("EE_PROJECT_NAME")

GEEE_STORAGE_DIR = os.getenv("GEEE_STORAGE_DIR")
CACHE_STORAGE_DIR = os.getenv("CACHE_STORAGE_DIR")

LANDSAT8_START_DATE = os.getenv("LANDSAT8_START_DATE")
LANDSAT8_DATA_STORAGE_DIR = os.getenv("LANDSAT8_DATA_STORAGE_DIR")

ECCC_CACHE_HOURLY_DOWNLOADED_DATA_LIST_FILE = f"{CACHE_STORAGE_DIR}{os.sep}eccc_hourly_downloaded_data_no_dupe_loc.csv"

DATA_STORAGE_DIR = f"{GEEE_STORAGE_DIR}{os.sep}{LANDSAT8_DATA_STORAGE_DIR}"

In [None]:
REGION_BUFFER_IN_M = 100_000
CLOUD_COVER_MAX = 90
SPATIAL_RESOLUTION = 30
CRS = "EPSG:3979"
IS_REGION_DOWNLOAD = False

In [None]:
LANDSAT8_SURFACE_REFLECTANCE_COLLECTION_NAME = "LANDSAT/LC08/C02/T1"

In [None]:
LANDSAT8_SURFACE_REFLECTANCE_BAND_NAME = [
    'B1', # ultra blue, coastal aerosol
    'B2', # blue
    'B3', # green
    'B4', # red
    'B5', # near infrared
    'B6', # shortwave infrared 1
    'B7', # shortwave infrared 2
    'B8', # Panchromatic used for sharpning images
    'B9', # Cirrus (cloud colver mapping)
    'B10', # TIR 1
    'B11', # TIR 2 
]

# API Authentication

You will need a Googel account with GEE enabled. GEE is free!!!

In [None]:
ee.Authenticate()

In [None]:
ee.Initialize(
    project=EE_PROJECT_NAME
)

# Data Loading

In [None]:
eccc_data_exits_df = pd.read_csv(
    ECCC_CACHE_HOURLY_DOWNLOADED_DATA_LIST_FILE
)
eccc_data_exits_df

# Data Download

In [None]:
long_lat_download_cache = []

present_date = datetime.now()

landsat8_start_date = datetime.strptime(LANDSAT8_START_DATE,'%Y-%m-%d')

dataset_size = 0

for eccc_station_row in eccc_data_exits_df.iterrows():
    # get station data
    latitude = eccc_station_row[1]['Latitude']
    longitude = eccc_station_row[1]['Longitude']
    climate_id = eccc_station_row[1]['Climate ID']
    station_end_year = eccc_station_row[1]['HLY Last Year']
    station_start_year = eccc_station_row[1]['HLY First Year']
    print(f"Started process for Climate ID: {climate_id}...")

    # build lat long
    long_lat = (longitude, latitude)

    # build gee search bounds
    point = ee.Geometry.Point(list(long_lat))
    region = point.buffer(REGION_BUFFER_IN_M).bounds()

    # get appropriate search dates
    start_date = datetime(
        year = int(station_start_year),
        month = 1,
        day = 1
    )
    start_date = LANDSAT8_START_DATE if landsat8_start_date > start_date else start_date.strftime('%Y-%m-%d')
    end_date = datetime(
        year = int(station_end_year),
        month = 1,
        day = 1
    )
    end_date = present_date.strftime('%Y-%m-%d') if present_date < end_date else end_date.strftime('%Y-%m-%d')
    print(f"Time filter: {start_date} - {end_date}")

    if start_date > end_date:
        print(f">>> Time of station data recording not present in {LANDSAT8_SURFACE_REFLECTANCE_COLLECTION_NAME} !!!")
        continue

    # download only if not in cache of downloaded location
    if long_lat not in long_lat_download_cache:
        # get collection 
        collection = ee.ImageCollection(LANDSAT8_SURFACE_REFLECTANCE_COLLECTION_NAME) \
            .filterBounds(region if IS_REGION_DOWNLOAD else point) \
            .filterDate(start_date, end_date) \
            .filterMetadata('CLOUD_COVER_LAND', 'less_than', CLOUD_COVER_MAX)
        
        collection_size = collection.size().getInfo()
        print(f"Collection size: {collection_size}")

        dataset_size +=collection_size
        
        image_list = collection.toList(collection_size)

        # download collection data 
        for image_index in range(0, collection_size):
            # get image
            image = ee.Image(
                image_list.get(image_index)
            )
            # get metadata
            image_metadata = image.getInfo()

            # get id
            id = image_metadata['id']
            id = id.replace('/', '_')

            # get start time 
            image_time = image_metadata['properties']['system:time_start']
            image_time_object = datetime.utcfromtimestamp(image_time / 1000)
            
            # build file name
            file_name = f'{id}_{climate_id}_{REGION_BUFFER_IN_M}_{image_time}'
            file_name_path_no_extension = f"{DATA_STORAGE_DIR}{os.sep}{file_name}"
            tif_path = f"{file_name_path_no_extension}.tif"
            metadata_path = f"{file_name_path_no_extension}.json"

            # skip if the data exists
            if os.path.isfile(tif_path) and os.path.isfile(metadata_path):
                print(f">>> ({image_index+1}/{collection_size}) TIF and Metadata found!")
                continue
            
            print(f"Downloading: ({image_index+1}/{collection_size})")
            # download if tif is not there
            if not os.path.isfile(tif_path):
                geemap.download_ee_image(
                    image = image,
                    filename = tif_path,
                    region = region,
                    scale = SPATIAL_RESOLUTION, # resolution
                    crs = CRS # projection of landcover
                )
            else:
                print(f">>> TIF found!")
            

            # download metadata if not exists
            if not os.path.isfile(metadata_path):
                with open(metadata_path, "w") as metadata_file:
                    json.dump(image_metadata, metadata_file)
            else:
                print(f">>> Metadata found!")

    else:
        print(f"Found cache for location: {long_lat} (long/lat)")

    long_lat_download_cache.append(long_lat)
