# Deriving ERA5 Data from GEE

The notebook is focused on slicing chuncks of ERA5 data on GEE and exporting it as .tif files to the Google Drive.

In [None]:
import numpy as np
import pandas as pd
import geopandas as gpd

import time
from datetime import datetime, timedelta
from os import listdir
from os.path import join

from src.utils_era import extract_era5_chunk, bands

In [2]:
gdf = gpd.read_file('../data_v02/gadm41_USA_1.json')
states = ['Minnesota', 'Iowa', 'Wisconsin', 'NorthDakota', 'SouthDakota', 'Nebraska', 'Illinois', 'Indiana', 'Missouri', 'Michigan', 'Kansas']
gdf = gdf[gdf.NAME_1.isin(states)].reset_index(drop=True)
bbox = np.round(gdf.union_all().bounds)

In [79]:
def create_daily_export(date):
    ee_date = ee.Date(date.strftime('%Y-%m-%d'))
   
    daily_era5 = (ee.ImageCollection('ECMWF/ERA5_LAND/DAILY')
        .filterBounds(upper_midwest)
        .filterDate(ee_date, ee_date.advance(1, 'day'))
    )

    daily_image = daily_era5.first().select(bands)

    task = ee.batch.Export.image.toDrive(image=filtered_era5.first(), description=f'ERA5_{date.year}_{date.day_of_year}', scale=11132, 
                                      region=upper_midwest, maxPixels=1e10, folder=None)
   
    return task

In [80]:
start_date = pd.to_datetime('2018-12-01')
end_date = pd.to_datetime('2023-12-31')

current_date = start_date
tasks = []
t = time.time()
while current_date <= end_date:
    task = create_daily_export(current_date)
    task.start()
    tasks.append(task)
    
    if len(tasks) >= 10:
        tasks = [t for t in tasks if t.status()['state'] not in ['COMPLETED', 'FAILED']]
        time.sleep(10)
   
    current_date += timedelta(days=1)
    
print(f"All tasks submitted! Time: {round((time.time()-t)/60,3)} min")

All tasks submitted! Time: 138.91 min
