In [1]:
import pandas as pd
import requests
import io

In [2]:
# API Documentation at https://glam1.gsfc.nasa.gov/api/doc/about

URL = "https://glam1.gsfc.nasa.gov/api/gettbl/v4"

params = {
    "version": "v11",
    "sat": "MOD", #MODIS
    "layer": "NDVI",
    "mask": "NASS_2011-2016_corn",
    "shape": "ADM", #ADMIN LEVEL SHAPE
    # "ids": "&ids=".join(["26226", "26228", "26237", "26244", "26245", "26246", "26251", "26253", "26258", "26264"]),
    "years": "&years=".join(["2013", "2014", "2015", "2016", "2017", "2018", "2019", "2020", "2021", "2022"]),
    "start_month": "4",
    "num_month": "7",
    "ts_type": "seasonal",
    "format": "csv"
}


In [3]:
query = "&".join([f"{key}={val}" for key, val in params.items()] + ["ids=26246"])

response = requests.get(f"{URL}?{query}")

# Read in data and store metadata
with io.StringIO(response.content.decode("UTF-8")) as f:

    metadata = [next(f) for _ in range(14)]
    df = pd.read_csv(f)

for line in metadata:
    print(line, end="")

df.head()

GLAM Project
NASA/GSFC/GIMMS
USDA/FAS/IPAD
Created on 2022-12-04 20:36:35 UTC

DB VERSION,v11
SAT,Terra AM
PRODUCT,MODIS NDVI 8-day
MEAN,2001-2021
MASK,NASS_2011-2016_corn
SHAPE,ADM
ID(s),26246
MAX SAMPLE COUNT,718925



Unnamed: 0,ORDINAL DATE,START DATE,END DATE,SOURCE,SAMPLE VALUE,SAMPLE COUNT,MEAN VALUE,MEAN COUNT,ANOM VALUE,MIN VALUE,MAX VALUE
0,2013-097,2013-04-07,2013-04-14,STD,0.248,493398.0,0.255,718903,-0.007,0.224,0.286
1,2013-105,2013-04-15,2013-04-22,STD,0.258,577231.0,0.27,718903,-0.012,0.238,0.318
2,2013-113,2013-04-23,2013-04-30,STD,0.249,667823.0,0.284,718903,-0.035,0.242,0.312
3,2013-121,2013-05-01,2013-05-08,STD,0.295,558627.0,0.3,718903,-0.005,0.257,0.342
4,2013-129,2013-05-09,2013-05-16,STD,0.289,711026.0,0.315,718903,-0.026,0.289,0.348


In [5]:
len(df['MEAN VALUE'].unique())

43

It appears that the columns MEAN VALUE & MEAN COUNT remain the same across all years.

This seems to correlate to the MEAN line plotted within the GLAM application that represents the NDVI
value over the last 20 years. Thus, it is irrelevant to our set of data that is predicated on the last decade.

Given that, the only value that would matter is the anomaly value - that is, the difference between the MEAN VALUE
and SAMPLE VALUE for that given period.

In [6]:
def fn(df):

    monthly_value = (df['SAMPLE VALUE']*df['SAMPLE COUNT']).sum()/df['SAMPLE COUNT'].sum()
    monthly_mean = (df['MEAN VALUE']*df['MEAN COUNT']).sum()/df['MEAN COUNT'].sum()
    return pd.Series({
            "SAMPLE VALUE":monthly_value,
            "SAMPLE COUNT": df['SAMPLE COUNT'].sum(),
            "ANOM VALUE": monthly_value-monthly_mean,
            "MIN VALUE": df['MIN VALUE'].min(),
            "MAX VALUE": df['MAX VALUE'].max()
        })

def parse_df(data):
    """
    Remove dates not in corn growing season (i.e. not in Apr-Nov).
    Combine weekly projections into monthly ones.
    Return dataframe with relevant monthly NDVI values.
    """

    df = data.copy()
    monthNumToName = {
        "04": "April",
        "05": "May",
        "06": "June",
        "07": "July",
        "08": "August",
        "09": "September",
        "10": "October",
        "11": "November"
    }

    df["year"] = df["START DATE"].str[:4]

    # Ensure we're only looking at 8-day periods we care about (i.e. rows during the growing season)
    validMonths = ['04', '05', '06', '07', '08', '09', '10', '11']
    regex = "|".join(f"\-{month}\-" for month in validMonths)
    df = df[(df['START DATE'].str.contains(regex)) & (df['END DATE'].str.contains(regex))]

    # Create an ordered month column
    months = ['April', 'May', 'June', 'July', 'August', 'September', 'October', 'November']
    df['month'] = df['START DATE'].str[5:7].apply(lambda num: monthNumToName[num])
    df['month'] = pd.Categorical(df['month'], categories=months, ordered=True)

    # Convert all column values to be based off of month rather than 8-day periods
    df_monthly = df.groupby(by=['year', 'month']).apply(fn).reset_index()


    df_monthly = df_monthly.groupby('year').apply(
        lambda x: (list(x['SAMPLE VALUE']), list(x['SAMPLE COUNT']), list(x['ANOM VALUE']), list(x['MIN VALUE']), list(x['MAX VALUE']))
    )\
    .to_frame(name="sequences").reset_index()
    
    # Convert each year's monthly row values into a single row
    df_monthly['sample_val_seq'] = df_monthly['sequences'].apply(lambda x: pd.Series({"sample_val_seq": x[0]}))
    df_monthly['sample_count_seq'] = df_monthly['sequences'].apply(lambda x: pd.Series({"sample_val_seq": x[1]}))
    df_monthly['anom_val_seq'] = df_monthly['sequences'].apply(lambda x: pd.Series({"sample_val_seq": x[2]}))
    df_monthly['min_val_seq'] = df_monthly['sequences'].apply(lambda x: pd.Series({"sample_val_seq": x[3]}))
    df_monthly['max_val_seq'] = df_monthly['sequences'].apply(lambda x: pd.Series({"sample_val_seq": x[4]}))

    sample_val_split = pd.DataFrame(df_monthly['sample_val_seq'].to_list(), columns = [f'sample_val_{month}' for month in months])
    sample_count_split = pd.DataFrame(df_monthly['sample_count_seq'].to_list(), columns = [f'sample_count_{month}' for month in months])
    anom_val_split = pd.DataFrame(df_monthly['anom_val_seq'].to_list(), columns = [f'anom_val_{month}' for month in months])
    min_val_split = pd.DataFrame(df_monthly['min_val_seq'].to_list(), columns = [f'min_val_{month}' for month in months])
    max_val_split = pd.DataFrame(df_monthly['max_val_seq'].to_list(), columns = [f'max_val_{month}' for month in months])
    
    df_final = pd.concat([df_monthly, sample_val_split, sample_count_split, anom_val_split, min_val_split, max_val_split], axis=1)
    df_final.drop(columns=['sequences', 'sample_val_seq', 'sample_count_seq', 'anom_val_seq', 'min_val_seq', 'max_val_seq'], inplace=True)

    return df_final
    

Now that we've settled on how we're transforming the data, let's request the relevant state NDVI data for all
corn belt states and concatenate the parsed dataframes into one large dataset.

In [7]:
# Map States to their ID in GLAM (manually taken - should find where GLAM IDS are stored)
idMapping = {
    "Iowa": "26246",
    "Minnesota": "26251",
    "South Dakota": "26237",
    "Nebraska": "26228",
    "Wisconsin": "26264",
    "Illinois": "26244",
    "Missouri": "26253",
    "Indiana": "26245",
    "Ohio": "26258",
    "Kansas": "26226"
}

idToMetadata = dict()
idToDf = dict()
for state, id in idMapping.items():
    query = "&".join([f"{key}={val}" for key, val in params.items()] + [f"ids={id}"])

    response = requests.get(f"{URL}?{query}")

    # Read in data and store metadata
    with io.StringIO(response.content.decode("UTF-8")) as f:

        metadata = [next(f) for _ in range(14)]
        df = pd.read_csv(f)

    idToDf[state] = parse_df(df)
    idToMetadata[state] = metadata
    print(f"Finished parsing {state}.")


Finished parsing Iowa.
Finished parsing Minnesota.
Finished parsing South Dakota.
Finished parsing Nebraska.
Finished parsing Wisconsin.
Finished parsing Illinois.
Finished parsing Missouri.
Finished parsing Indiana.
Finished parsing Ohio.
Finished parsing Kansas.


In [19]:
for state in idToDf:
    idToDf[state]['state'] = state

In [22]:
df_ndvi = pd.DataFrame()
for df in idToDf.values():
    df_ndvi = pd.concat([df_ndvi, df])

In [28]:
df_ndvi.to_csv('data/ndvi_data.csv', index=False)