# Import libraries

In [29]:
import numpy as np
import pandas as pd
import geopandas as gpd
import requests

# Set feature service variables

In [30]:
feat_srvc = 'https://services3.arcgis.com/0Fs3HcaFfvzXvm7w/ArcGIS/rest/services/CRIS_Zonal_Statistics_by_County/FeatureServer/3'
query = '/query'
feat_name = feat_srvc.split('/')[-3]

# Use a uniquely defining field to pick which counties to calculate zonal statistics over.
# In this case, use Geographic identifiers (Field Name: GEOID).
uniqueID = 'GEOID'

# Pick counties and years to return

In [None]:
# Put each county's GEOID between apostrophes and separate by commas, e.g. ids = '20201', '20117'
# Set ids to 'all' if using all counties in the feature layer
ids = '20201', '20203', '20117'

In [32]:
# Pick start and end year of the period you want to process.
# If you only want to process 1 year, put your desired year for both the start and end.
year_start = 1960
year_end = 1970

In [33]:
# Define the where clause for counties and years to return

if ids == 'all':
    
    where_clause_id = f"YEAR >= {year_start} AND YEAR <= {year_end}"

else:
    
    if np.size(ids) == 1:

        where_clause_id = f"{uniqueID} = '{ids}' AND YEAR >= {year_start} AND YEAR <= {year_end}"

    else:

        where_clause_id = f"{uniqueID} IN {ids} AND YEAR >= {year_start} AND YEAR <= {year_end}"

# Pick variables to return

### Pick one of the three options below, run the appropriate cells, then move to "Retrieve data"

##### Option 1: return all variables

In [23]:
# Run this cell if you want all variables returned
variables = '*'

##### Option 2: return manually selected subset of variables

In [None]:
# Show available variables
response_temp = requests.get(feat_srvc + '?f=pjson')
data = response_temp.json()
fields = data['fields']
for field in fields:
    print('Variable:', field['name'], '\nDescription:', field['alias'], '\n')
    print()

In [418]:
# If you want 1 or more variables from the above list, select them here
# Put each variable between apostrophes and separate by commas, e.g. variables = 'TMAX_NCLIMGRID_MIN'
variables = 'TMAX_NCLIMGRID_MIN', 'TMAX_NCLIMGRID_MAX', 'TMAX_NCLIMGRID_MEAN'

##### Option 3: select all NCLIMGRID or all LIVNEH variables

In [35]:
# Run the line below for all NCLIMGRID variables
variables = [field['name'] for field in fields if "NCLIMGRID" in field['name']]

# Run the line below for all NCLIMGRID variables
# variables = [field['name'] for field in fields if "LIVNEH" in field['name']]

# Retrieve data

In [36]:
# Add counties ('GEOID') and 'YEAR' to the variables-to-return list
return_variables = ['GEOID', 'YEAR']

if np.size(variables) == 1:

    return_variables.append(variables)

else:

    for var in variables:

        return_variables.append(var)

In [37]:
# Maximum number of variables that can be processed in each request: 50
# Maximum number of rows that can be processed in each request: 1000
# This code loops through variables and rows and appends each subset (1000x50) to a final GeoDataFrame (gdf)

# Number of variables that are requested to be processed
vars_to_process = len(return_variables)

# Number of variables to process in each loop
loop_size = 50

# Set initial value for loop number (v)
v = 0

# Loop through variables, 50 at one time
# While there are variables left to process, keep looping through this code
while vars_to_process > 0:

    # Sequentially subset the variables to process with 50 variables in each loop
    return_variables_condition = return_variables[max((loop_size*v - 1) + 1, 0): min(loop_size*(v+1), len(return_variables))]

    # Update number of variables to process, i.e. subtract 50 (loop_size)
    vars_to_process -= loop_size
    
    # Update loop number (v)
    v += 1

    # Set parameters for requests.get()
    params = {
        'where': where_clause_id,
        'outFields': return_variables_condition,
        'orderByFields': ['YEAR', 'GEOID'],
        'f': 'pgeojson',
    }

    # Request response from feature service
    response = requests.get(feat_srvc + query, params=params)


    # Put the first 50 variables in gdf to establish the GeoDataFrame
    if v == 1:
        
        # Translate the reponse.text into a temporary GeoDataFrame
        gdf_var1_temp = gpd.read_file(response.text)

        # Drop geometry field
        gdf_var1_temp.drop(columns=['geometry'], inplace=True)

        # Establish the final GeoDataFrame (gdf) and put the first subset of rows and variables (temporary GeoDataFrame) in it
        gdf = gdf_var1_temp


        # Set initial value for loop number (i)
        i = 0

        # Loop through rows, 1000 at one time
        while len(gdf_var1_temp) == 1000:
            
            # Update loop number (i)
            i = i + 1

            # Set the offset for in params. This says how many rows to skip from the start.
            offset = 1000 * i

            # Set parameters for requests.get()
            params = {
                'where': where_clause_id,
                'outFields': return_variables_condition,
                'orderByFields': ['YEAR', 'GEOID'],
                'f': 'pgeojson',
                'resultOffset': f'{offset}'
            }

            # Request response from feature service
            response = requests.get(feat_srvc + query, params=params)

            # Translate the reponse.text into a temporary GeoDataFrame
            gdf_var1_temp = gpd.read_file(response.text)

            # Concatenate the new subset to the final GeoDataFrame (gdf)
            gdf = pd.concat([gdf, gdf_var1_temp], ignore_index=True);

            # Drop geometry field
            gdf.drop(columns=['geometry'], inplace=True)


    # Put all the additional variables, beyond the first 50, in a temporary GeoDataFrame and concatenate with the final GeoDataFrame (gdf)
    elif v > 1:
    
        # Translate the reponse.text into a temporary GeoDataFrame
        gdf_var2_temp = gpd.read_file(response.text)
        
        # Drop geometry field
        gdf_var2_temp.drop(columns=['geometry'], inplace=True)

        # Establish the a temporart GeoDataFrame (gdf_var2) and put the first subset of rows and variables (temporary GeoDataFrame) in it     
        gdf_var2 = gdf_var2_temp


        # Set initial value for loop number (j)
        j = 0

        # Loop through rows, 1000 at one time
        while len(gdf_var2_temp) == 1000:

            # Update loop number (j)
            j = j + 1

            # Set the offset for in params. This says how many rows to skip from the start.
            offset = 1000 * j

            # Set parameters for requests.get()
            params = {
                'where': where_clause_id,
                'outFields': return_variables_condition,
                'orderByFields': ['YEAR', 'GEOID'],
                'f': 'pgeojson',
                'resultOffset': f'{offset}'
            }

            # Request response from feature service
            response = requests.get(feat_srvc + query, params=params)

            # Translate the reponse.text into a temporary GeoDataFrame
            gdf_var2_temp = gpd.read_file(response.text)

            # Concatenate the new subset to the temporary GeoDataFrame (gdf_var2)
            gdf_var2 = pd.concat([gdf_var2, gdf_var2_temp], ignore_index=True)

            # Drop geometry field
            gdf_var2.drop(columns=['geometry'], inplace=True)

        # Concatenate temporary GeoDataFrame (gdf_var2) to the final GeoDataFrame (gdf)
        gdf = pd.concat([gdf, gdf_var2], axis=1)

In [38]:
gdf

Unnamed: 0,GEOID,YEAR,TMAX_NCLIMGRID_MIN,TMAX_NCLIMGRID_MEAN,TMAX_NCLIMGRID_MAX,TAVG_NCLIMGRID_MIN,TAVG_NCLIMGRID_MEAN,TAVG_NCLIMGRID_MAX,TMIN_NCLIMGRID_MIN,TMIN_NCLIMGRID_MEAN,...,PRABVNZ99TH_NCLIMGRID_MAX,PRDAYSABVNZ90TH_NCLIMGRID_MIN,PRDAYSABVNZ90TH_NCLIMGRID_MEAN,PRDAYSABVNZ90TH_NCLIMGRID_MAX,PRDAYSABVNZ95TH_NCLIMGRID_MIN,PRDAYSABVNZ95TH_NCLIMGRID_MEAN,PRDAYSABVNZ95TH_NCLIMGRID_MAX,PRDAYSABVNZ99TH_NCLIMGRID_MIN,PRDAYSABVNZ99TH_NCLIMGRID_MEAN,PRDAYSABVNZ99TH_NCLIMGRID_MAX
0,20117,1960,60.59855,61.65095,62.72018,49.78303,50.45352,51.09841,38.73701,39.25613,...,0.091245,4,6.638462,9,2,2.692308,5,0,0.061538,1
1,20201,1960,59.99556,61.21878,62.20083,49.24397,50.18954,50.93623,38.33039,39.16031,...,0.060175,6,7.664286,10,2,3.114286,4,0,0.042857,1
2,20203,1960,64.15045,64.99002,65.55435,50.80145,51.36655,51.76242,37.37737,37.74318,...,0.0,4,6.479339,9,1,2.123967,4,0,0.0,0
3,20117,1961,61.16008,62.32478,63.22834,50.04365,50.78687,51.39643,38.61145,39.24902,...,0.688666,12,14.60769,16,7,8.0,11,1,1.492308,4
4,20201,1961,61.34624,62.344,63.26941,50.08638,50.86523,51.40093,38.58541,39.38648,...,2.661058,10,12.92857,15,7,8.164286,10,1,2.371428,4
5,20203,1961,64.31181,64.99916,65.45311,50.79366,51.25188,51.60447,37.1624,37.50465,...,0.406671,4,9.305785,12,1,3.198347,5,0,0.586777,1
6,20117,1962,61.92708,63.26773,64.31146,51.28812,52.07831,52.77944,40.3418,40.88895,...,2.895071,7,8.730769,11,4,4.507692,6,1,1.315385,3
7,20201,1962,62.06536,63.35727,64.40408,51.15702,52.13924,52.82926,40.08089,40.92117,...,2.884059,5,7.321429,10,2,3.785714,5,0,1.128571,2
8,20203,1962,66.85512,67.33525,67.7943,52.53965,53.05908,53.56401,38.15741,38.78296,...,0.771259,4,6.057851,8,2,3.38843,6,0,0.53719,1
9,20117,1963,64.17758,65.57433,66.73495,52.84132,53.72041,54.48975,41.16528,41.86652,...,0.0,2,4.923077,6,0,2.246154,4,0,0.0,0


# Analysis examples

### Calculation 1: mean over time period

In [39]:
# If desired, run this cell to take a subset of the time period
year_start_subset = 1962
year_end_subset = 1968

gdf = gdf[(gdf['YEAR'] >= year_start_subset) & (gdf['YEAR'] < year_end_subset+1)]

##### Run this cell to return the mean of each variable in the GeoDataFrame

In [40]:
# Group by county (GEOID) and calculate mean
variables_mean = gdf.groupby('GEOID').mean()

# Add index numbers to rows
variables_mean.reset_index(inplace=True)

# Drop YEAR column
variables_mean.drop(columns='YEAR', inplace=True)

# Rename column names to reflect that they represent the mean of each variable over time
variables_mean.rename(columns={gdf.columns[i]: f"{gdf.columns[i]} (mean)" for i in range(1, len(gdf.columns))}, inplace=True)

# Show means of variables
variables_mean

Unnamed: 0,GEOID,TMAX_NCLIMGRID_MIN (mean),TMAX_NCLIMGRID_MEAN (mean),TMAX_NCLIMGRID_MAX (mean),TAVG_NCLIMGRID_MIN (mean),TAVG_NCLIMGRID_MEAN (mean),TAVG_NCLIMGRID_MAX (mean),TMIN_NCLIMGRID_MIN (mean),TMIN_NCLIMGRID_MEAN (mean),TMIN_NCLIMGRID_MAX (mean),...,PRABVNZ99TH_NCLIMGRID_MAX (mean),PRDAYSABVNZ90TH_NCLIMGRID_MIN (mean),PRDAYSABVNZ90TH_NCLIMGRID_MEAN (mean),PRDAYSABVNZ90TH_NCLIMGRID_MAX (mean),PRDAYSABVNZ95TH_NCLIMGRID_MIN (mean),PRDAYSABVNZ95TH_NCLIMGRID_MEAN (mean),PRDAYSABVNZ95TH_NCLIMGRID_MAX (mean),PRDAYSABVNZ99TH_NCLIMGRID_MIN (mean),PRDAYSABVNZ99TH_NCLIMGRID_MEAN (mean),PRDAYSABVNZ99TH_NCLIMGRID_MAX (mean)
0,20117,63.011027,64.28175,65.290319,51.620457,52.359241,53.018111,39.85573,40.436724,41.337421,...,1.304291,5.142857,7.838462,10.285714,2.285714,3.92967,6.0,0.571429,0.867033,1.714286
1,20201,63.135021,64.431346,65.491894,51.48888,52.477181,53.159274,39.68999,40.523016,41.242467,...,1.318225,4.857143,7.606122,11.142857,1.285714,3.252041,5.571429,0.0,0.619388,1.714286
2,20203,66.566227,67.193259,67.756231,52.276163,52.767074,53.261786,37.867054,38.34093,38.839097,...,0.875314,4.285714,6.321134,8.428571,2.0,3.347108,5.0,0.285714,0.475797,1.0


### Calculation 2: change over time

In [41]:
# Set years over which to calculate the change
year_start_change = 1962
year_end_change = 1968

# Create two new GeoDataFrames which contain only the the start and end years, respectively
gdf_start_change = gdf[(gdf['YEAR'] == year_start_change)]
gdf_end_change = gdf[(gdf['YEAR'] == year_end_change)]

In [42]:
# Set the indices of both GeoDataFrames to the counties (GEOIDs)
gdf_start_change.set_index('GEOID', inplace=True);
gdf_end_change.set_index('GEOID', inplace=True);

In [43]:
# Calculate absolute difference between start and end years
variables_difference_absolute = gdf_end_change - gdf_start_change

# Drop YEAR column
variables_difference_absolute.drop(columns='YEAR', inplace=True)

# Add index numbers to rows
variables_difference_absolute.reset_index(inplace=True)

# Show absolute difference of each county and each variable between the start and end years
variables_difference_absolute

Unnamed: 0,GEOID,TMAX_NCLIMGRID_MIN,TMAX_NCLIMGRID_MEAN,TMAX_NCLIMGRID_MAX,TAVG_NCLIMGRID_MIN,TAVG_NCLIMGRID_MEAN,TAVG_NCLIMGRID_MAX,TMIN_NCLIMGRID_MIN,TMIN_NCLIMGRID_MEAN,TMIN_NCLIMGRID_MAX,...,PRABVNZ99TH_NCLIMGRID_MAX,PRDAYSABVNZ90TH_NCLIMGRID_MIN,PRDAYSABVNZ90TH_NCLIMGRID_MEAN,PRDAYSABVNZ90TH_NCLIMGRID_MAX,PRDAYSABVNZ95TH_NCLIMGRID_MIN,PRDAYSABVNZ95TH_NCLIMGRID_MEAN,PRDAYSABVNZ95TH_NCLIMGRID_MAX,PRDAYSABVNZ99TH_NCLIMGRID_MIN,PRDAYSABVNZ99TH_NCLIMGRID_MEAN,PRDAYSABVNZ99TH_NCLIMGRID_MAX
0,20117,0.50425,0.28911,0.15501,-0.31177,-0.38028,-0.50888,-1.0414,-1.04976,-1.17355,...,-0.191143,-3,-1.307692,-1,-2,-1.476923,-1,1,0.684615,-1
1,20201,0.44423,0.30986,0.1873,-0.23938,-0.33845,-0.44498,-0.86275,-0.98675,-1.16392,...,-0.231653,1,1.964285,4,0,-0.55,1,0,0.328572,0
2,20203,-0.85861,-0.74486,-0.65822,-0.99731,-1.06961,-1.09719,-1.20495,-1.39442,-1.45779,...,-0.771259,-2,-2.487603,-2,-2,-2.0,-4,0,-0.53719,-1


In [44]:
# Calculate percentage difference between start and end years
variables_difference_percentage = ((gdf_end_change - gdf_start_change) / gdf_start_change) * 100

# Drop YEAR column
variables_difference_percentage.drop(columns='YEAR', inplace=True)

# Rename columns to reflect that they represent the percentage difference between the start and end years
variables_difference_percentage.rename(columns={gdf.columns[i]: f"{gdf.columns[i]} (%)" for i in range(1, len(gdf.columns))}, inplace=True)

# Add index numbers to rows
variables_difference_percentage.reset_index(inplace=True)

# Show percentage difference of each county and each variable between the start and end years
variables_difference_percentage

Unnamed: 0,GEOID,TMAX_NCLIMGRID_MIN (%),TMAX_NCLIMGRID_MEAN (%),TMAX_NCLIMGRID_MAX (%),TAVG_NCLIMGRID_MIN (%),TAVG_NCLIMGRID_MEAN (%),TAVG_NCLIMGRID_MAX (%),TMIN_NCLIMGRID_MIN (%),TMIN_NCLIMGRID_MEAN (%),TMIN_NCLIMGRID_MAX (%),...,PRABVNZ99TH_NCLIMGRID_MAX (%),PRDAYSABVNZ90TH_NCLIMGRID_MIN (%),PRDAYSABVNZ90TH_NCLIMGRID_MEAN (%),PRDAYSABVNZ90TH_NCLIMGRID_MAX (%),PRDAYSABVNZ95TH_NCLIMGRID_MIN (%),PRDAYSABVNZ95TH_NCLIMGRID_MEAN (%),PRDAYSABVNZ95TH_NCLIMGRID_MAX (%),PRDAYSABVNZ99TH_NCLIMGRID_MIN (%),PRDAYSABVNZ99TH_NCLIMGRID_MEAN (%),PRDAYSABVNZ99TH_NCLIMGRID_MAX (%)
0,20117,0.814264,0.456963,0.24103,-0.60788,-0.730208,-0.964163,-2.581442,-2.567344,-2.805942,...,-6.60236,-42.857143,-14.97797,-9.090909,-50.0,-32.764506,-16.666667,100.0,52.046739,-33.333333
1,20201,0.715745,0.489068,0.29082,-0.467932,-0.649127,-0.842298,-2.152522,-2.411344,-2.789973,...,-8.032187,20.0,26.829257,40.0,0.0,-14.528303,20.0,,29.113986,0.0
2,20203,-1.284285,-1.106196,-0.970908,-1.898204,-2.015885,-2.048372,-3.15784,-3.595445,-3.702202,...,-100.0,-50.0,-41.064117,-25.0,-100.0,-59.024386,-66.666667,,-100.0,-100.0


### Calculation 3: bivariate comparison of temperature and precipitation

In [None]:
# As example: process one temperature variable and one precipitation variable to make a bivariate map.
# Check if there are statistically comparable variables to use, e.g. 90th percentile.
# hot and dry days?
# TMAXDAYSGE90F_NCLIMGRID_MEAN - hot - Annual number of days with a maximum temperature greater than or equal to 90°F - nClimGrid - Mean
# PRDAYSGE1IN_CLIMGRID_MEAN - dry - Annual number of days with total precipitation greater than or equal to 1 inch - nClimGrid - Mean

In [None]:
# grab geometries from featurelayer/0
# link geometries to GEOIDs
# grab the two variables from gdf, for 1 year
# plot bivariate map of two variables, for each county, for 1 variable