In [1]:
import numpy as np
import pandas as pd
import geopandas as gpd

import time
from os import listdir
from os.path import join


from utils_modis import extract_MODIS_chunk

In [2]:
gdf = gpd.read_file('../data_v02/gadm41_USA_1.json')
states = ['Minnesota', 'Iowa', 'Wisconsin', 'NorthDakota', 'SouthDakota', 'Nebraska', 'Illinois', 'Indiana', 'Missouri', 'Michigan', 'Kansas']
gdf = gdf[gdf.NAME_1.isin(states)].reset_index(drop=True)
bbox = np.round(gdf.union_all().bounds)

In [7]:
step = 0.0045
lons = np.arange(bbox[0], bbox[2], step)
lats = np.arange(bbox[1], bbox[3], step)

lon_grid, lat_grid = np.meshgrid(lons, lats)

In [11]:
def process_MODIS_data(grid, batch_size=500):
    all_features = []
    all_dates = []
    
    for i in range(0, len(grid), batch_size):
        chunk = grid.iloc[i:i+batch_size]

        try:
            t = time.time()
            chunk_features, chunk_dates = extract_MODIS_chunk(chunk)
            all_features.append(chunk_features)
            all_dates.extend(chunk_dates)
            
            print(f'Processed {i + len(chunk_features)}/{len(grid)} locations | Time: {round(time.time() - t, 3)}')
        
        except Exception as e:
            print(f"Error processing chunk {i}: {e}")
    
    df = pd.concat(all_features, axis=0).reset_index(drop=True)
    df = pd.concat([df, grid], axis=1)
    
    return df

In [12]:
dates = pd.date_range('2024-01-01', '2024-01-31', freq='1D')
dfs = []
for date in dates:
    mesh_df = pd.DataFrame({
    'lon': lon_grid.ravel(),
    'lat': lat_grid.ravel(),
    'date': [date]*len(lat_grid.ravel())
    })
    
    df = process_MODIS_data(mesh_df, batch_size=5000)

    dfs.append(df)

(5000, 12)
Processed 5000/14124321 locations | Time: 35.336


  df['ARVI'] = (mod_features[:,1] + mod_features[:,2] - 2*mod_features[:,0])/(mod_features[:,1] + mod_features[:,2] + 2*mod_features[:,0])


(5000, 12)
Processed 10000/14124321 locations | Time: 198.43


  df['ARVI'] = (mod_features[:,1] + mod_features[:,2] - 2*mod_features[:,0])/(mod_features[:,1] + mod_features[:,2] + 2*mod_features[:,0])


(5000, 12)
Processed 15000/14124321 locations | Time: 215.663
(5000, 12)
Processed 20000/14124321 locations | Time: 276.591
(5000, 12)
Processed 25000/14124321 locations | Time: 218.024
(5000, 12)
Processed 30000/14124321 locations | Time: 195.042
(5000, 12)
Processed 35000/14124321 locations | Time: 196.496
(5000, 12)
Processed 40000/14124321 locations | Time: 232.996
(5000, 12)
Processed 45000/14124321 locations | Time: 200.298
(5000, 12)
Processed 50000/14124321 locations | Time: 210.538
(5000, 12)
Processed 55000/14124321 locations | Time: 268.301
(5000, 12)
Processed 60000/14124321 locations | Time: 209.665
(5000, 12)
Processed 65000/14124321 locations | Time: 179.923
(5000, 12)
Processed 70000/14124321 locations | Time: 182.097
(5000, 12)
Processed 75000/14124321 locations | Time: 161.791
(5000, 12)
Processed 80000/14124321 locations | Time: 209.811
(5000, 12)
Processed 85000/14124321 locations | Time: 169.848
(5000, 12)
Processed 90000/14124321 locations | Time: 207.698
(5000, 1

KeyboardInterrupt: 

In [22]:
pd.concat(dfs)

Unnamed: 0,temperature_2m,dewpoint_temperature_2m,u_component_of_wind_10m,v_component_of_wind_10m,surface_net_solar_radiation_sum,total_evaporation_sum,surface_pressure,total_precipitation_sum,LE_PM,temperature_2m_rol_30,...,total_precipitation_sum_max,total_precipitation_sum_std,LE_PM_rol_30,LE_PM_rol_7,LE_PM_min,LE_PM_max,LE_PM_std,lon,lat,date
0,271.030700,265.337547,-1.003529,2.199015,7780204.0,-0.000302,82727.890462,0.000051,238.122190,276.975735,...,0.002734,0.000502,361.942956,366.135450,238.122190,449.457572,54.613621,-104.0,36.0,2025-01-01
1,271.198506,265.571271,-0.838510,2.061117,7431132.0,-0.000308,83415.057129,0.000059,233.680539,277.346428,...,0.002527,0.000462,362.626852,372.382890,233.680539,450.239771,56.597541,-103.9,36.0,2025-01-01
2,271.378600,265.797427,-0.647633,1.891581,7192578.0,-0.000297,84030.223796,0.000071,233.507974,277.707520,...,0.002325,0.000424,369.231631,382.575353,233.507974,459.573478,59.249689,-103.8,36.0,2025-01-01
3,271.561380,266.012922,-0.466542,1.730001,6930964.0,-0.000278,84453.640462,0.000081,232.427237,277.998262,...,0.002124,0.000387,374.755391,389.374683,232.427237,467.000228,61.748631,-103.7,36.0,2025-01-01
4,271.730000,266.172671,-0.408701,1.676839,6589044.0,-0.000262,84754.973796,0.000074,224.736227,278.160642,...,0.001920,0.000350,372.571730,388.678064,224.736227,468.824721,64.538436,-103.6,36.0,2025-01-01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
146,278.187438,271.380435,1.927623,-2.302165,7424524.0,-0.000709,101199.682129,0.000000,268.525339,280.793021,...,0.038038,0.010031,254.896875,251.481672,38.766714,465.256700,101.677584,-89.4,36.0,2025-01-01
147,278.208516,271.336571,1.906546,-2.223307,7183488.0,-0.000595,101150.265462,0.000000,263.087926,280.790802,...,0.037351,0.010138,252.850878,249.518624,38.293814,462.473903,100.733501,-89.3,36.0,2025-01-01
148,278.247171,271.350487,1.908967,-2.183940,7084038.0,-0.000617,101165.973796,0.000000,261.037923,280.802556,...,0.037294,0.010307,252.607265,250.176904,39.061479,463.695114,100.824968,-89.2,36.0,2025-01-01
149,278.259704,271.326968,1.911815,-2.144796,7008910.0,-0.000618,101113.807129,0.000000,259.576277,280.795820,...,0.037242,0.010497,252.867330,251.098902,40.066988,466.673890,101.426876,-89.1,36.0,2025-01-01
