In [110]:
import pandas as pd
import requests
import io

In [111]:
API_KEY = "861E6E6E-6BD2-3C50-8731-82E4EC90A5E3"


In [112]:
# API Documentation at https://quickstats.nass.usda.gov/api/

params = {
    "source_desc": "SURVEY",
    "sector_desc": "CROPS",
    "group_desc": "FIELD CROPS",
    "commodity_desc": "CORN",
    "statisticcat_desc": "PRODUCTION",
    "short_desc": "CORN, GRAIN - PRODUCTION, MEASURED IN BU",
    "domain_desc": "TOTAL",
    "agg_level_desc": "COUNTY",
    "year__GE": "2013"
}

query = "&".join([f"{key}={val}" for key, val in params.items()])

URL = f"https://quickstats.nass.usda.gov/api/api_GET/?key={API_KEY}&{query}&format=CSV"
headers = {'User-Agent': ''} # NEEDED BECAUSE USDA BLOCKS 'non-browser' traffic


In [113]:
response = requests.get(URL,headers = {'User-Agent': ''})

In [None]:
df = pd.read_csv(io.StringIO(response.content.decode("UTF-8")), thousands=",")

In [None]:
print(len(df))
print(df.columns)

14524
Index(['source_desc', 'sector_desc', 'group_desc', 'commodity_desc',
       'class_desc', 'prodn_practice_desc', 'util_practice_desc',
       'statisticcat_desc', 'unit_desc', 'short_desc', 'domain_desc',
       'domaincat_desc', 'agg_level_desc', 'state_ansi', 'state_fips_code',
       'state_alpha', 'state_name', 'asd_code', 'asd_desc', 'county_ansi',
       'county_code', 'county_name', 'region_desc', 'zip_5', 'watershed_code',
       'watershed_desc', 'congr_district_code', 'country_code', 'country_name',
       'location_desc', 'year', 'freq_desc', 'begin_code', 'end_code',
       'reference_period_desc', 'week_ending', 'load_time', 'Value', 'CV (%)'],
      dtype='object')


That's a lot of columns. Let's keep the ones we care about.

In [None]:
columnsToKeep = ["year", "state_name", 'county_name', "Value"]
df = df[columnsToKeep]

In [None]:
df.head()

Unnamed: 0,year,state_name,Value
0,2019,ALABAMA,920000
1,2018,ALABAMA,322000
2,2017,ALABAMA,254000
3,2016,ALABAMA,192000
4,2015,ALABAMA,990000


Now that we have all the corn production data by county for all of the states,
let's further prune this for the counties in the state's that we want.

In [None]:
corn_belt_states = set([
    "MINNESOTA",
    "SOUTH DAKOTA",
    "NEBRASKA",
    "KANSAS",
    "IOWA",
    "WISCONSIN",
    "ILLINOIS",
    "MISSOURI",
    "INDIANA",
    "OHIO"
])

df_corn = df[df["state_name"].isin(set(corn_belt_states))].reset_index(drop=True)

In [None]:
df_corn.head()

Unnamed: 0,year,state_name,Value
0,2019,ILLINOIS,16000000
1,2021,ILLINOIS,50468000
2,2020,ILLINOIS,51135000
3,2019,ILLINOIS,37696000
4,2018,ILLINOIS,55882000


Currently, these values are in terms of bushels. Let's convert it to (1000 metric tons).

In [None]:
df_corn["Value"] = df_corn["Value"]/39.3679/1000

In [None]:
df_corn['state_name']

Unnamed: 0,year,state_name,Value
0,2019,ILLINOIS,406.422491
1,2021,ILLINOIS,1281.958144
2,2020,ILLINOIS,1298.900881
3,2019,ILLINOIS,957.53139
4,2018,ILLINOIS,1419.481354
