In [3]:
#!pip install nass

In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

"""
download_yield_data.py

Gets yield data for Sorghum from NASS via the public API and saves it in raw
form to the file 'raw_historical_sorghum_yield_data.csv' to the
`raw_data/` directory.

Documentation for the  `nass` package which provides the interface to the
NASS API can be found here:
https://nass.readthedocs.io/en/latest/api.html#

Tyler Huntington, 2018
"""

import sys
import pandas as pd
import json
from nass import NassApi
from pprint import pprint

# define globals

global statistic 
# define the stat to download data for from nass.
# possible values include: 'YIELD', 'PRODUCTION', 'AREA PLANTED', 'AREA HARVESTED' etc...

statistic = 'PROGRESS'

global API_KEY
# need to specify NASS API key here for script to work
API_KEY = '5B6FF8C9-CC66-3C84-8040-EB4DDFA7657A'

In [2]:
def main():

    # load counties data
    counties_file = r'C:\Users\uqjojeda\Nextcloud\PURTERRA-A0131\2020\USDA\counties.csv'
    counties_df = pd.read_csv(counties_file)
    counties_df

    # rename columns
    new_names_dict = {
    #                 'STATEFP': 'STATE_FP',
    #                 'STATENAME': 'STATE_NAME',
                     'STATEABBREV': 'STATE_ABBREV',
                     'NAME': 'COUNTY_NAME',
                     'COUNTYFP': 'COUNTY_FP',
                     }
    counties_df.rename(index=str, columns=new_names_dict, inplace=True)

    # convert county names to uppercase
    counties_df.COUNTY_NAME = counties_df.COUNTY_NAME.apply(to_upper)
    # convert county FP codes to strings
    counties_df.COUNTY_FP = counties_df.COUNTY_FP.apply(zfill_int_to_str,
                                                      args=(3,))
    #counties_df.STATE_FP = counties_df.STATE_FP.apply(zfill_int_to_str,
    #                                                  args=(2,))
    #counties_df.FIPS = counties_df.FIPS.apply(zfill_int_to_str,
    #                                                  args=(5,))

    #drop_cols = ['Unnamed: 0', 'COUNTYNS', 'AFFGEOID',
    #             'LSAD', 'ALAND', 'AWATER',]
    #for c in drop_cols:
    #    counties_df.drop(c, axis=1, inplace=True)

    api = NassApi(API_KEY)
    years = api.param_values('year')

    sorghum_yrs = {}
    exceed_yrs = []
    resp_count = 0
    accum_df = pd.DataFrame()

    for y in years:
        print("Starting new query for year: {} ".format(y))
        q = api.query()
        q = q.filter('commodity_desc', 'SORGHUM').filter('year', y)
        q = q.filter('statisticcat_desc', statistic)
        q = q.filter('agg_level_desc', 'STATE')
        rows = q.count()

        if rows == 0:
            print("No rows resulted from query!")
        elif rows > 5000:
            sorghum_yrs[y] = rows
            print("Query limit exceeded!")
            exceed_yrs.append(y)
        elif rows > 0:
            sorghum_yrs[y] = rows
            print("Rows: " + str(rows))
            resp = q.execute()
            resp_count += 1
            if resp_count == 1:
                accum_df = pd.DataFrame(resp)
            else:
                new_rows = pd.DataFrame(resp)
                accum_df = pd.concat([accum_df, new_rows])

    outpath = r'C:\Users\uqjojeda\Nextcloud\PURTERRA-A0131\2020\USDA\PlantedDateCountySorghum.csv'
    #outfile = 'nass_historical_sorghum_yields.csv'
    #outpath = outdir + outfile
    accum_df.to_csv(outpath, index=False)

# Helper functions
def to_upper(s):
    return s.upper()

def zfill_int_to_str(i, width):
    return(str(i).zfill(width))


if __name__=='__main__':
    main()

Starting new query for year: 1850 
No rows resulted from query!
Starting new query for year: 1860 
No rows resulted from query!
Starting new query for year: 1866 
No rows resulted from query!
Starting new query for year: 1867 
No rows resulted from query!
Starting new query for year: 1868 
No rows resulted from query!
Starting new query for year: 1869 
No rows resulted from query!
Starting new query for year: 1870 
No rows resulted from query!
Starting new query for year: 1871 
No rows resulted from query!
Starting new query for year: 1872 
No rows resulted from query!
Starting new query for year: 1873 
No rows resulted from query!
Starting new query for year: 1874 
No rows resulted from query!
Starting new query for year: 1875 
No rows resulted from query!
Starting new query for year: 1876 
No rows resulted from query!
Starting new query for year: 1877 
No rows resulted from query!
Starting new query for year: 1878 
No rows resulted from query!
Starting new query for year: 1879 
No ro

Rows: 470
Starting new query for year: 1997 
Rows: 551
Starting new query for year: 1998 
Rows: 551
Starting new query for year: 1999 
Rows: 651
Starting new query for year: 2000 
Rows: 528
Starting new query for year: 2001 
Rows: 550
Starting new query for year: 2002 
Rows: 660
Starting new query for year: 2003 
Rows: 759
Starting new query for year: 2004 
Rows: 757
Starting new query for year: 2005 
Rows: 715
Starting new query for year: 2006 
Rows: 741
Starting new query for year: 2007 
Rows: 693
Starting new query for year: 2008 
Rows: 803
Starting new query for year: 2009 
Rows: 667
Starting new query for year: 2010 
Rows: 595
Starting new query for year: 2011 
Rows: 661
Starting new query for year: 2012 
Rows: 770
Starting new query for year: 2013 
Rows: 627
Starting new query for year: 2014 
Rows: 1024
Starting new query for year: 2015 
Rows: 1003
Starting new query for year: 2016 
Rows: 975
Starting new query for year: 2017 
Rows: 1018
Starting new query for year: 2018 
Rows: 9