In [70]:
import numpy as np
import pandas as pd
import geopandas as gpd

import time
from datetime import datetime, timedelta
from os import listdir
from os.path import join

import geetools

from utils_era import extract_era5_chunk, bands

In [2]:
gdf = gpd.read_file('../data_v02/gadm41_USA_1.json')
states = ['Minnesota', 'Iowa', 'Wisconsin', 'NorthDakota', 'SouthDakota', 'Nebraska', 'Illinois', 'Indiana', 'Missouri', 'Michigan', 'Kansas']
gdf = gdf[gdf.NAME_1.isin(states)].reset_index(drop=True)
bbox = np.round(gdf.union_all().bounds)

In [79]:
def create_daily_export(date):
    ee_date = ee.Date(date.strftime('%Y-%m-%d'))
   
    daily_era5 = (ee.ImageCollection('ECMWF/ERA5_LAND/DAILY')
        .filterBounds(upper_midwest)
        .filterDate(ee_date, ee_date.advance(1, 'day'))
    )

    daily_image = daily_era5.first().select(bands)

    task = ee.batch.Export.image.toDrive(image=filtered_era5.first(), description=f'ERA5_{date.year}_{date.day_of_year}', scale=11132, 
                                      region=upper_midwest, maxPixels=1e10, folder=None)
   
    return task

In [80]:
start_date = pd.to_datetime('2018-12-01')
end_date = pd.to_datetime('2023-12-31')

current_date = start_date
tasks = []
t = time.time()
while current_date <= end_date:
    task = create_daily_export(current_date)
    task.start()
    tasks.append(task)
    
    if len(tasks) >= 10:
        tasks = [t for t in tasks if t.status()['state'] not in ['COMPLETED', 'FAILED']]
        time.sleep(10)
   
    current_date += timedelta(days=1)
    
print(f"All tasks submitted! Time: {round((time.time()-t)/60,3)} min")

All tasks submitted! Time: 138.91 min


# Old pipeline

In [3]:
step = 0.1#0.0045
lons = np.arange(bbox[0], bbox[2], step)
lats = np.arange(bbox[1], bbox[3], step)

lon_grid, lat_grid = np.meshgrid(lons, lats)

In [4]:
def process_era5_data(grid, batch_size=500):
    all_features = []
    all_dates = []
    
    for i in range(0, len(grid), batch_size):
        chunk = grid.iloc[i:i+batch_size]

        #try:
        t = time.time()
        chunk_features, chunk_dates = extract_era5_chunk(chunk)

        all_features.append(chunk_features)
        all_dates.extend(chunk_dates)

        print(f'Processed {i + len(chunk_features)}/{len(grid)} locations | Time: {round(time.time() - t, 3)}')
        
        #except Exception as e:
            #print(f"Error processing chunk {i}: {e}")
    df = pd.concat(all_features, axis=0).reset_index(drop=True)
    df = pd.concat([df, grid], axis=1)
    
    return df

In [16]:
year = 2024

In [17]:
mesh_df = pd.DataFrame({
        'lon': lon_grid.ravel(),
        'lat': lat_grid.ravel()})

In [19]:
for month in range(1,13):
    m = '0' + str(month) if month<10 else month
    dates = pd.date_range(f'{year}-{m}-01', f'{year}-{m}-31', freq='1D')
    dfs = []
    for date in dates:
        mesh_df['date'] = [date]*len(mesh_df)
        df = process_era5_data(mesh_df, batch_size=100)
        dfs.append(df)
    df = pd.concat(dfs)
    df.to_csv(f'./data/ERA5/{year}_{m}.csv', index=None)
    print(f'{year}-{m} is saved')

Processed 100/28600 locations | Time: 6.647
Processed 200/28600 locations | Time: 8.324
Processed 300/28600 locations | Time: 6.387
Processed 400/28600 locations | Time: 199.43
Processed 500/28600 locations | Time: 6.903
Processed 600/28600 locations | Time: 7.005
Processed 700/28600 locations | Time: 6.313
Processed 800/28600 locations | Time: 7.128
Processed 900/28600 locations | Time: 4.409
Processed 1000/28600 locations | Time: 9.224
Processed 1100/28600 locations | Time: 7.869
Processed 1200/28600 locations | Time: 9.86
Processed 1300/28600 locations | Time: 5.928
Processed 1400/28600 locations | Time: 9.658
Processed 1500/28600 locations | Time: 9.517
Processed 1600/28600 locations | Time: 6.349
Processed 1700/28600 locations | Time: 9.209
Processed 1800/28600 locations | Time: 8.872
Processed 1900/28600 locations | Time: 7.003
Processed 2000/28600 locations | Time: 7.337
Processed 2100/28600 locations | Time: 8.26
Processed 2200/28600 locations | Time: 6.089
Processed 2300/28600

EEException: Computation timed out.

In [20]:
df

Unnamed: 0,temperature_2m,dewpoint_temperature_2m,u_component_of_wind_10m,v_component_of_wind_10m,surface_net_solar_radiation_sum,total_evaporation_sum,surface_pressure,total_precipitation_sum,LE_PM,temperature_2m_rol_30,...,total_precipitation_sum_max,total_precipitation_sum_std,LE_PM_rol_30,LE_PM_rol_7,LE_PM_min,LE_PM_max,LE_PM_std,lon,lat,date
0,272.718991,268.421642,1.235924,1.773605,9190336.0,-0.000289,82849.085612,8.523463e-07,306.659817,274.341245,...,0.029818,0.005599,252.738081,304.339199,22.980784,431.200143,124.360815,-104.0,36.0,2024-01-01
1,272.968421,268.637381,1.356876,1.745163,9173496.0,-0.000325,83525.960612,8.523463e-07,306.237811,275.011102,...,0.028682,0.005398,266.705963,311.659580,23.305977,433.323699,119.408702,-103.9,36.0,2024-01-01
2,273.266924,268.847098,1.497663,1.714279,9270848.0,-0.000364,84130.918945,8.523463e-07,309.906577,275.658359,...,0.027777,0.005238,286.184457,321.303989,30.104909,443.062158,113.192713,-103.8,36.0,2024-01-01
3,273.551022,269.020438,1.626610,1.691798,9329912.0,-0.000368,84545.960612,8.523463e-07,312.579851,276.251480,...,0.026802,0.005068,305.018516,328.788294,43.459486,451.152078,105.717827,-103.7,36.0,2024-01-01
4,273.667315,269.122814,1.619286,1.756210,9225034.0,-0.000350,84841.168945,8.523463e-07,308.714634,276.652050,...,0.024927,0.004743,314.746119,328.322899,47.258912,455.901376,100.419898,-103.6,36.0,2024-01-01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28595,262.442542,257.818045,1.669884,1.067082,2995214.0,-0.000293,98463.002279,2.238154e-06,57.650474,268.088725,...,0.015370,0.003543,48.661075,41.449445,16.039296,85.690636,18.761918,-82.5,48.9,2024-01-01
28596,262.420000,257.727795,1.674259,1.045395,3031806.0,-0.000279,98480.543945,2.282858e-06,58.314169,268.047503,...,0.014754,0.003387,48.991906,41.630578,16.613418,85.238919,18.721014,-82.4,48.9,2024-01-01
28597,262.397621,257.621431,1.683963,1.021794,3102012.0,-0.000273,98508.543945,2.253055e-06,59.523871,268.048016,...,0.014042,0.003217,49.831923,42.314712,17.435022,85.192974,18.862838,-82.3,48.9,2024-01-01
28598,262.470781,257.580741,1.693810,0.998235,3128200.0,-0.000286,98581.418945,2.133846e-06,60.385095,268.082459,...,0.013330,0.003051,50.175087,42.559143,18.162847,85.030619,18.876073,-82.2,48.9,2024-01-01
