In [183]:
import urllib.request
from urllib.error import HTTPError
from requests.utils import requote_uri
import requests

# Retrieve NASS API key from environment variables (you have to get your own)
import os
my_NASS_API_key = os.getenv('NASS_API_KEY')

class c_usda_quick_stats:

    def __init__(self):

        # Set the USDA QuickStats API key, API base URL, and output file path where CSV files will be written. 

        # self.api_key = 'PASTE_YOUR_API_KEY_HERE'
        self.api_key = 'A9430A01-0F9A-38AC-9904-987B1A9D6D17'

        self.base_url_api_get = 'http://quickstats.nass.usda.gov/api/api_GET/?key=' \
                                + self.api_key + '&'

    def get_data(self, parameters, file_path, file_name):

        # Call the api_GET api with the specified parameters. 
        # Write the CSV data to the specified output file.

        # Create the full URL and retrieve the data from the Quick Stats server.
        
        full_url = self.base_url_api_get + parameters        
        print(full_url)

        try:
            s_result = urllib.request.urlopen(full_url)
            # print(type(s_result))
            print(s_result.status, s_result.reason)
            # print(s_result.status_code)
            s_text = s_result.read().decode('utf-8')

            # Create the output file and write the CSV data records to the file.

            s_file_name = file_path + file_name
            o_file = open(s_file_name, "w", encoding="utf8")
            o_file.write(s_text)
            o_file.close()
        except HTTPError as error:
            print(error.code, error.reason)
        except requests.exceptions.RequestException as e:
            print(f"An error occurred while fetching the data: {e}")
        except ValueError as e:
            print(f"Failed to parse the response data: {e}")
        except:
            print(f"Failed because of unknown exception; perhaps the USDA NASS site is down")

In [211]:
import sys
import urllib.parse

output_dir = './'


parameters =    'source_desc=SURVEY' +  \
                '&sector_desc=CROPS' + \
                '&commodity_desc=WHEAT' + \
                '&statisticcat_desc=YIELD' + \
                '&geographic_level=STATE' + \
                '&agg_level_desc=COUNTY' + \
                '&state_name=IDAHO' + \
                '&state_name=MICHIGAN' + \
                '&state_name=INDIANA' + \
                '&state_name=KENTUCKY' + \
                '&state_name=OHIO' + \
                '&state_name=ILLNOIS' + \
                '&state_name=WISCONSIN' + \
                '&state_name=MARYLAND' + \
                '&state_name=DELAWARE' + \
                '&state_name=TENNESSEE' + \
                '&state_name=CALIFORNIA' + \
                '&' + urllib.parse.quote('state_name=NEW YORK') + \
                '&state_name=ALABAMA' + \
                '&' + urllib.parse.quote('state_name=NEW JERSEY') + \
                '&state_name=WASHINGTON' + \
                '&state_name=VIRGINIA' + \
                '&year__GE=1990' + \
                '&year__LE=2022' + \
                '&format=CSV'

stats = c_usda_quick_stats()

#                 '&' + urllib.parse.quote('group_desc=WHEAT - YIELD, MEASURED IN BU / ACRE') + \
#                 '&state_name=MONTANA' + \
#                '&state_name=IDAHO' + \
#                '&state_name=NORTH DAKOTA' + \
# Including curr_timestamp() into file name to keep outputs separated during development/exploration
stats.get_data(parameters, output_dir, f'wheat_yield_data_1990_2022.csv')

http://quickstats.nass.usda.gov/api/api_GET/?key=A9430A01-0F9A-38AC-9904-987B1A9D6D17&source_desc=SURVEY&sector_desc=CROPS&commodity_desc=WHEAT&statisticcat_desc=YIELD&geographic_level=STATE&agg_level_desc=COUNTY&state_name=IDAHO&state_name=MICHIGAN&state_name=INDIANA&state_name=KENTUCKY&state_name=OHIO&state_name=ILLNOIS&state_name=WISCONSIN&state_name=MARYLAND&state_name=DELAWARE&state_name=TENNESSEE&state_name=CALIFORNIA&state_name%3DNEW%20YORK&state_name=ALABAMA&state_name%3DNEW%20JERSEY&state_name=WASHINGTON&state_name=VIRGINIA&year__GE=1990&year__LE=2022&format=CSV
200 OK


In [215]:
import pandas as pd


df_2 = pd.read_csv("wheat_yield_data_1990_2022.csv")
print(len(df_2))
df_2[['short_desc']].drop_duplicates()


df_winter = df_2[df_2['short_desc'] == "WHEAT, WINTER - YIELD, MEASURED IN BU / ACRE"]

bad_county_names = ['OTHER COUNTIES', 'OTHER (COMBINED) COUNTIES']
df_winter = df_winter[~df_winter.county_name.isin(bad_county_names)]
df_winter = df_winter[df_winter['Value'] != 0]

winter_wheat = df_winter[['year','state_name','county_name','Value']]
print(winter_wheat)
winter_wheat.to_csv("./winter_wheat_yield.csv", index=False)

df2 = df_winter[['state_name','county_name']].drop_duplicates()
print(len(df2))

yield_sum = df_winter.groupby(['county_name', 'state_name'])['Value'].sum().reset_index()

# Sort the yield sums in descending order to get the top yielders overall
#top_yielders = yield_sum.sort_values(ascending=True)

yield_sum.sort_values('Value',ascending=True, inplace=True)
print(yield_sum)
yield_sum.to_csv("./top_yielding_counties.csv", index=False)

46432
       year state_name county_name  Value
22495  2003    ALABAMA     COLBERT   41.0
22496  2000    ALABAMA     COLBERT   54.0
22497  1999    ALABAMA     COLBERT   46.0
22498  1998    ALABAMA     COLBERT   39.0
22499  1996    ALABAMA     COLBERT   47.0
...     ...        ...         ...    ...
39434  1994  WISCONSIN    WAUKESHA   47.7
39435  1993  WISCONSIN    WAUKESHA   33.5
39436  1992  WISCONSIN    WAUKESHA   35.7
39437  1991  WISCONSIN    WAUKESHA   38.7
39438  1990  WISCONSIN    WAUKESHA   51.1

[15408 rows x 4 columns]
726
     county_name state_name   Value
403         LUCE   MICHIGAN    29.0
478        MOORE  TENNESSEE    30.0
324      JACKSON  TENNESSEE    30.0
581  SAINT CLAIR    ALABAMA    30.0
383        LEWIS  TENNESSEE    31.0
..           ...        ...     ...
1            ADA      IDAHO  3272.4
53       BINGHAM      IDAHO  3484.6
95        CANYON      IDAHO  3717.5
336       JEROME      IDAHO  3798.8
663   TWIN FALLS      IDAHO  3948.3

[726 rows x 3 columns]


In [None]:
from geopy.geocoders import Nominatim
import pandas as pd 

def geocode_county(state, county):
    geolocator = Nominatim(user_agent="county_geocoder")
    location = geolocator.geocode(county + ", " + state + ", USA")
    if location:
        return location.longitude, location.latitude
    else:
        print('no lat-lon found for ', state, county)
        return None, None
    
df_winter_wheat = pd.read_csv("winter_wheat_yield.csv")

df_winter_wheat['lon'] = df_winter_wheat.apply(lambda x: geocode_county(x['state_name'], x['county_name'])[0], axis=1)
df_winter_wheat['lat'] = df_winter_wheat.apply(lambda x: geocode_county(x['state_name'], x['county_name'])[1], axis=1)

print(df_winter_wheat)