In [1]:
import numpy as np
import pandas as pd
import geopandas as gpd

import time
from datetime import datetime, timedelta
from os import listdir
from os.path import join

import ee
from utils_modis import extract_MODIS_chunk, bands, mod_names

In [2]:
gdf = gpd.read_file('../data_v02/gadm41_USA_1.json')
states = ['Minnesota', 'Iowa', 'Wisconsin', 'NorthDakota', 'SouthDakota', 'Nebraska', 'Illinois', 'Indiana', 'Missouri', 'Michigan', 'Kansas']
gdf = gdf[gdf.NAME_1.isin(states)].reset_index(drop=True)
bbox = np.round(gdf.union_all().bounds)

upper_midwest = ee.Geometry.Rectangle(list(bbox))

In [3]:
def mask(image):
    qa = image.select(bands[-1])
    
    cloud = qa.bitwiseAnd(1 << 0).eq(1)
    shadow = qa.bitwiseAnd(1 << 1).eq(1)
    cirrus = qa.bitwiseAnd(1 << 2).eq(1)
    
    # Cloud or shadow or cirrus = 1, else 0
    cloud_mask = cloud.Or(shadow).Or(cirrus).rename(mod_names[-1]).toInt16()

    image = image.select(bands[:-1]).addBands(cloud_mask)
    return image
    
def create_daily_export(date):
    ee_date = ee.Date(date.strftime('%Y-%m-%d'))
   
    daily_modis = (ee.ImageCollection('MODIS/061/MOD09GA')
        .filterBounds(upper_midwest)
        .filterDate(ee_date, ee_date.advance(1, 'day'))
    )

    daily_image = daily_modis.first().select(bands[:-1])
    #daily_image_masked = mask(daily_image)
    task = ee.batch.Export.image.toDrive(image=daily_image, description=f'MODIS_{date.year}_{date.day_of_year}', scale=500, 
                                      region=upper_midwest, maxPixels=1e10, folder=None)
   
    return task

In [5]:
start_date = pd.to_datetime('2021-01-01')
end_date = pd.to_datetime('2021-12-31')

current_date = start_date
tasks = []
t = time.time()

'''Download MODIS without the cloud band'''
while current_date <= end_date:
    task = create_daily_export(current_date)
    task.start()
    tasks.append(task)
    
    if len(tasks) >= 10:
        tasks = [t for t in tasks if t.status()['state'] not in ['COMPLETED', 'FAILED']]
        time.sleep(10)
   
    current_date += timedelta(days=1)
    
print(f"All tasks submitted! Time: {round((time.time()-t)/60,3)} min")

All tasks submitted! Time: 190.029 min


# Old pipeline

In [3]:
step = 0.0045
lons = np.arange(bbox[0], bbox[2], step)
lats = np.arange(bbox[1], bbox[3], step)

lon_grid, lat_grid = np.meshgrid(lons, lats)

In [4]:
def process_MODIS_data(grid, batch_size=500):
    all_features = []
    all_dates = []
    
    for i in range(0, len(grid), batch_size):
        chunk = grid.iloc[i:i+batch_size]

        try:
            t = time.time()
            chunk_features, chunk_dates = extract_MODIS_chunk(chunk)
            all_features.append(chunk_features)
            all_dates.extend(chunk_dates)
            
            print(f'Processed {i + len(chunk_features)}/{len(grid)} locations | Time: {round(time.time() - t, 3)}')
        
        except Exception as e:
            print(f"Error processing chunk {i}: {e}")
    
    df = pd.concat(all_features, axis=0).reset_index(drop=True)
    df = pd.concat([df, grid], axis=1)
    
    return df

In [5]:
year = 2024

In [6]:
mesh_df = pd.DataFrame({
        'lon': lon_grid.ravel(),
        'lat': lat_grid.ravel()})

In [7]:
for month in range(1,13):
    m = '0' + str(month) if month<10 else month
    dates = pd.date_range(f'{year}-{m}-01', f'{year}-{m}-31', freq='1D')
    dfs = []
    for date in dates:
        mesh_df['date'] = [date]*len(mesh_df)
        df = process_MODIS_data(mesh_df, batch_size=5000)
        dfs.append(df)
    df = pd.concat(dfs)
    df.to_csv(f'./data/MODIS/{year}_{m}.csv', index=None)
    print(f'{year}-{m} is saved')

Processed 5000/14124321 locations | Time: 77.073
Processed 10000/14124321 locations | Time: 145.698
Processed 15000/14124321 locations | Time: 152.788
Processed 20000/14124321 locations | Time: 149.541
Processed 25000/14124321 locations | Time: 138.462
Processed 30000/14124321 locations | Time: 129.443
Processed 35000/14124321 locations | Time: 151.047
Processed 40000/14124321 locations | Time: 138.009
Processed 45000/14124321 locations | Time: 147.625
Processed 50000/14124321 locations | Time: 150.409
Processed 55000/14124321 locations | Time: 140.036
Processed 60000/14124321 locations | Time: 143.345
Processed 65000/14124321 locations | Time: 145.045
Processed 70000/14124321 locations | Time: 162.21
Processed 75000/14124321 locations | Time: 128.728
Processed 80000/14124321 locations | Time: 159.343
Processed 85000/14124321 locations | Time: 141.222
Error processing chunk 85000: Computation timed out.
Processed 95000/14124321 locations | Time: 135.259
Processed 100000/14124321 locati

  df['GNDVI'] = (mod_features[:,1] - mod_features[:,3])/(mod_features[:,1] + mod_features[:,3])


Processed 7814997/14124321 locations | Time: 149.112


  df['GNDVI'] = (mod_features[:,1] - mod_features[:,3])/(mod_features[:,1] + mod_features[:,3])


Processed 7819998/14124321 locations | Time: 148.415
Processed 7825000/14124321 locations | Time: 160.473
Processed 7830000/14124321 locations | Time: 133.499
Processed 7834998/14124321 locations | Time: 136.515
Processed 7840000/14124321 locations | Time: 149.534
Processed 7845000/14124321 locations | Time: 147.578
Processed 7850000/14124321 locations | Time: 134.705
Processed 7854998/14124321 locations | Time: 133.874
Processed 7859995/14124321 locations | Time: 121.644
Processed 7864996/14124321 locations | Time: 128.896
Processed 7869997/14124321 locations | Time: 151.67
Processed 7874992/14124321 locations | Time: 134.257
Processed 7879996/14124321 locations | Time: 153.788
Processed 7884999/14124321 locations | Time: 154.766
Processed 7890000/14124321 locations | Time: 157.319
Processed 7895000/14124321 locations | Time: 137.4
Processed 7900000/14124321 locations | Time: 174.595
Processed 7904999/14124321 locations | Time: 144.928
Processed 7910000/14124321 locations | Time: 153.

KeyboardInterrupt: 