In [47]:
import pandas as pd
import os
import requests
import time
import numpy as np
import geopandas as gpd
import json
from datetime import datetime as dt

In [48]:
georgia = gpd.read_file(r"C:\Users\kelechi\OneDrive - Kansas State University\Desktop\Research Resources\Conferences\NASA_DEVELOP\Data\NASS_Districts_Shapefiles\georgia_NASS_districts.shp")
texas = gpd.read_file(r"C:\Users\kelechi\OneDrive - Kansas State University\Desktop\Research Resources\Conferences\NASA_DEVELOP\Data\NASS_Districts_Shapefiles\texas_NASS_districts.shp")
output_dir = r"C:\Users\kelechi\OneDrive - Kansas State University\Desktop\Research Resources\Conferences\NASA_DEVELOP\Data"


In [49]:
# Get districts in both states
texas['NASS'].unique()

array([12, 21, 22, 60, 70], dtype=int64)

In [50]:
# Get districts in both states
georgia['NASS'].unique()

array([70, 80, 90], dtype=int64)

In [51]:
#Prep for API Call
# Set root URL for API requests
root_url = 'https://api.climateengine.org/'

# Authentication info for the API
# This is my personal key. You can request a key from 'https://support.climateengine.org/article/36-requesting-an-authorization-key-token'
headers = {'Authorization': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJmcmVzaCI6ZmFsc2UsImlhdCI6MTczODEwOTQwNiwianRpIjoiNGM0NGZiMjUtZWNkYy00OTA1LTkyNzctMWJjNmEwMGUzYjUxIiwibmJmIjoxNzM4MTA5NDA2LCJ0eXBlIjoiYWNjZXNzIiwic3ViIjoiZzU2aVR2YURUVmRHeVNCQktqcGd2bUxCbEM5MyIsImV4cCI6MTc0MzI5MzQwNiwicm9sZXMiOiJ1c2VyIiwidXNlcl9pZCI6Imc1NmlUdmFEVFZkR3lTQkJLanBndm1MQmxDOTMifQ.nFGydbKmpg2CTLgxlKLefyeZ498W8C0OVbdkmebhAoM'}


## GridMet
The GridMet drought metadata is provided here: https://support.climateengine.org/article/45-gridmet-drought. 
The values are updated every 5 days (pentad). However, we can extract the 30-day SPI and take the value for the last day of the month, following similar approach recommended here: https://gmao.gsfc.nasa.gov/research/subseasonal/atlas/SPI-html/SPI-description.html


In [52]:
# Set endpoint
endpoint = 'timeseries/native/coordinates'

district_list = []

for i, row in texas.iterrows():
    xmin, ymin, xmax, ymax = row.geometry.bounds
    bbox = [
        [[xmin,ymin], [xmax, ymin], [xmax,ymax], [xmin, ymax], [xmin, ymin]]
    ]
    print("Extracting district: ", row['NASS'])

    #Set up parameters dictionary for API call
    params = {
        'dataset': 'GRIDMET_DROUGHT',
        'variable': 'spi30d', # 30 day spi
        'start_date': '2015-01-01',
        'end_date': '2024-12-31',
        'area_reducer': 'mean',
        'coordinates': str(bbox)#'[[[-85.14193532099995, 30.658401773000037], [-83.73616188499994, 30.658401773000037], [-83.73616188499994, 32.23310909700007], [-85.14193532099995, 32.23310909700007], [-85.14193532099995, 30.658401773000037]]]'
    }
    
    # Send API request
    r1 = requests.get(root_url + endpoint, params=params, headers=headers, verify=False)
    
    # Convert to json format
    response_1 = r1.json()
    
    #Select Data
    data = response_1[0]['Data']
    df1 = pd.DataFrame.from_dict(data)
    
    df1['NASS_district'] = row['NASS'] # district

    district_list.append(df1)


Extracting district:  12




Extracting district:  21




Extracting district:  22




Extracting district:  60




Extracting district:  70




In [53]:
# Explore the summary statistics of the raw data
df1.describe()

Unnamed: 0,spi30d,NASS_district
count,730.0,730.0
mean,-0.025683,70.0
std,0.826728,0.0
min,-2.0304,70.0
25%,-0.651625,70.0
50%,-0.06105,70.0
75%,0.6598,70.0
max,1.8642,70.0


In [54]:
df2 = pd.concat(district_list, axis = 0, ignore_index=True)

# Convert date colum and creat year and month columns
df2['Date'] = pd.to_datetime(df2['Date'])
df2['month'] = df2['Date'].dt.month
df2['year'] = df2['Date'].dt.year

# Group by months and get the monthly average
monthly_data = df2.groupby(['NASS_district', 'year', 'month'])[df2.columns[1]].last() # df2.columns[1] = spi30d
monthly_data = monthly_data.reset_index()
#monthly_precip
monthly_data

Unnamed: 0,NASS_district,year,month,spi30d
0,12,2015,1,1.5165
1,12,2015,2,0.3632
2,12,2015,3,0.3132
3,12,2015,4,0.8864
4,12,2015,5,1.8221
...,...,...,...,...
595,70,2024,8,-1.0874
596,70,2024,9,0.7333
597,70,2024,10,-2.0304
598,70,2024,11,0.3252


In [36]:
'''
This cell is optional. It adds a column to the data the specifies whether the month is dry or wet, based on the SPI values.
If you don't want to include this column, you can skip this cell and run the next one instead.
'''
# Create a function that will classify drought conditions
# Define a function for condition classification

def classify_condition(spi_value):
    if spi_value >= 0.5:
        condition = 'Wet'
    elif spi_value <= -0.5:
        condition = 'Dry'
    else:
        condition = 'Normal'
    return condition
    
# Apply function to the 'spi' column
monthly_data['Condition'] = monthly_data['spi30d'].apply(classify_condition)

print(monthly_data)


     NASS_district  year  month    spi14d Condition
0               12  2015      1  0.875633       Wet
1               12  2015      2  0.159360    Normal
2               12  2015      3  0.506257       Wet
3               12  2015      4  0.437933    Normal
4               12  2015      5  1.406567       Wet
..             ...   ...    ...       ...       ...
595             70  2024      8 -0.442750    Normal
596             70  2024      9  0.107750    Normal
597             70  2024     10 -1.059486       Dry
598             70  2024     11  0.308700    Normal
599             70  2024     12 -0.379783    Normal

[600 rows x 5 columns]


In [55]:
# Create a folder and save the data
data_dir = os.path.join(output_dir, "GridMet")
os.makedirs(data_dir, exist_ok=True)

monthly_data.to_csv(os.path.join(data_dir, 'spi30d_monthly_Texas.csv'), sep=',', index=False)

In [46]:
os.getcwd()

'C:\\Users\\kelechi\\OneDrive - Kansas State University\\Desktop\\Research Resources\\Conferences\\NASA_DEVELOP\\Scripts'

## CHIRPS

In [16]:
# Set endpoint
endpoint = 'timeseries/native/coordinates'

tmax_list = []

for i, row in texas.iterrows():
    xmin, ymin, xmax, ymax = row.geometry.bounds
    bbox = [
        [[xmin,ymin], [xmax, ymin], [xmax,ymax], [xmin, ymax], [xmin, ymin]]
    ]
    print("Extracting district: ", row['NASS'])

    #Set up parameters dictionary for API call
    params = {
        'dataset': 'CHIRPS_DAILY',
        'variable': 'precipitation',
        'start_date': '2015-01-01',
        'end_date': '2024-12-31',
        'area_reducer': 'mean',
        'coordinates': str(bbox)#'[[[-85.14193532099995, 30.658401773000037], [-83.73616188499994, 30.658401773000037], [-83.73616188499994, 32.23310909700007], [-85.14193532099995, 32.23310909700007], [-85.14193532099995, 30.658401773000037]]]'
    }

    # Send API request
    r1 = requests.get(root_url + endpoint, params=params, headers=headers, verify=False)
    
    # Convert to json format
    response_1 = r1.json()
    
    #Select Data
    data = response_1[0]['Data']
    df1 = pd.DataFrame.from_dict(data)
    
    df1['NASS_district'] = row['NASS'] # district

    tmax_list.append(df1)



Extracting district:  12




Extracting district:  21




Extracting district:  22




Extracting district:  60




Extracting district:  70




In [17]:
df2 = pd.concat(tmax_list, axis = 0, ignore_index=True)

# Convert date colum and creat year and month columns
df2['Date'] = pd.to_datetime(df2['Date'])
df2['month'] = df2['Date'].dt.month
df2['year'] = df2['Date'].dt.year

# Group by months and get the monthly average
monthly_data = df2.groupby(['NASS_district', 'year', 'month'])[df2.columns[1]].mean() # df2.columns[1] = tmmx (C°)
monthly_data = monthly_data.reset_index()
#monthly_precip
monthly_data

Unnamed: 0,NASS_district,year,month,precipitation (mm)
0,12,2015,1,0.969165
1,12,2015,2,0.493471
2,12,2015,3,0.722787
3,12,2015,4,1.521773
4,12,2015,5,5.310310
...,...,...,...,...
595,70,2024,8,0.817155
596,70,2024,9,2.633623
597,70,2024,10,0.281039
598,70,2024,11,1.132883


In [18]:
data_dir = os.path.join(output_dir, "CHIRPS")
os.makedirs(data_dir, exist_ok=True)

monthly_data.to_csv(os.path.join(data_dir, 'CHIRPS_precip_Texas.csv'), sep='\t', index=False)

In [10]:
os.getcwd()

'C:\\Users\\kelechi\\OneDrive - Kansas State University\\Desktop\\Research Resources\\Conferences\\NASA_DEVELOP\\Scripts'