In [None]:
import csv
import os
from datetime import datetime, timedelta
import pandas as pd
import requests
import logging

AGRIMET_DATA_DIR = "d:/Websites/AgWaterAPI/agrimet/histEtSummaries"

cropCodes = {
    "ALFA": "Alfalfa",
    "ALFM": "Alfalfa (Mean)",
    "ALFN": "Alfalfa (New Plant)",
    "ALFP": "Alfalfa (Peak)",
    "APPL": "Apples",
    "ASPA": "Asparagus",
    "BEAN": "Dry Beans",
    "BEET": "Sugar Beets",
    "BLGR": "Bluegrass Seed",
    "BLUB": "Blueberries",
    "BROC": "Broccoli",
    "CABG": "Cabbage",
    "CGRP": "Concord Grapes",
    "CHRY": "Cherries",
    "CRAN": "Cranberries",
    "CRTS": "Carrot Seed",
    "FCRN": "Field Corn",
    "GARL": "Garlic",
    "GRSD": "Grass Seed",
    "HAYP": "Fescue Grass Hay (Peak Daily Consumptive Use for Mature Grass Hay)",
    "HAYM": "Fescue Grass Hay (Mean Annual Use with 3 Seasonal Cuttings)",
    "HOPS": "Hops",
    "LAWN": "Lawn",
    "LILY": "Easter Lilies",
    "MELN": "Melons",
    "NMNT": "New Mint",
    "ONYN": "Onion",
    "ORCH": "Orchards",
    "PAST": "Pasture",
    "PEAR": "Pears",
    "PEAS": "Peas",
    "PECH": "Peaches",
    "POP1": "First Year Poplar Trees",
    "POP2": "Second Year Poplar Trees",
    "POP3": "Third Year + Poplar Trees",
    "POTA": "Potatoes",
    "POTS": "Potatoes (Shepody)",
    "PPMT": "Peppermint",
    "RAPE": "Rapeseed (Canola)",
    "SAFL": "Safflower",
    "SPMT": "Spearmint",
    "SBAR": "Spring Barley",
    "SBRY": "Strawberry",
    "SCRN": "Sweet Corn",
    "SGRN": "Spring Grain",
    "SPMT": "Spearmint",
    "TBER": "Trailing Berries",
    "WGRN": "Winter Grain",
    "WGRP": "Wine Grape"
}

etCols = [
        "Daily Penman ET (in)-4",
        "Daily Penman ET (in)-3",
        "Daily Penman ET (in)-2",
        "Daily Penman ET (in)-1",
        "Daily Penman ET (in)"
    ]


def get_crop_water_use(station):
    """
    Retrieves the past five days of Crop ET for the given station (all crops for that station).
    """
    try:
        url = f"https://www.usbr.gov/pn/agrimet/chart/{station}ch.txt"

        response = requests.get(url)
        response.raise_for_status()
        content = response.text
        # Split the content into lines and filter out comment lines (starting with #)
        data = [line for line in content.splitlines() if line.strip() and not line.strip().startswith('#')]

        data = data[12:]  # Skip the header line s
        # Extract every other line, starting with the first
        data = data[::2]

        # Split each line using '*' as a delimiter and strip whitespace from each part
        data = [line.split('*') for line in data]
        data = [line[1:] for line in data]

        # for each line, flatten the line
        # and split by whitespace to get individual data points
        _data = []
        for line in data:
            line = ' '.join(line).strip()
            _data.append(line.split())

        # Optionally, convert to a pandas DataFrame for structured data
        df = pd.DataFrame(_data, columns= ['CropCode','Start Date', 'Daily Penman ET (in)-4',
                                    'Daily Penman ET (in)-3', 'Daily Penman ET (in)-2',
                                    'Daily Penman ET (in)-1', 'Daily Penman ET (in)',
                                    'Cover Date', 'Term Date', 'Sum ET (in)', '7 Day Use', '14 Day Use'],
                                     )
        cropNames = df['CropCode'].map(cropCodes).fillna(df['CropCode'])  # Map crop codes to names
        df['Name'] = cropNames    

        #print(df)
       
        crops = []
        for index, row in df.iterrows():
            _crop = {}
            _crop['cropCode'] = row['CropCode']
            _crop['name'] = row['Name']
            _crop['startDate'] = row['Start Date']
            _crop['coverDate'] = row['Cover Date']
            _crop['termDate'] = row['Term Date']
            _crop['sumET'] = row['Sum ET (in)']
            _crop['7DayUse'] = row['7 Day Use']
            _crop['14DayUse'] = row['14 Day Use']
            crops.append(_crop)
            
        dates = []
        today = datetime.today()
        for i in range(4, -1, -1):
            date = today - timedelta(days=i)
            dates.append(date.strftime('%m/%d'))            
         
        #print(dates)
           
        # create chart data
        dfHistET = pd.DataFrame()
        stationETSummaries = get_station_summary_data(station.lower(), dates[0], dates[4])
        if stationETSummaries:
            # the stationETSummaries data to the dataframe, appending the string "histET" to each column name
            #print(stationETSummaries['crops'])
            cropETs = []
            for crop in stationETSummaries['crops']:
                # find the crop in the 'data' array of dictionaries
                data = stationETSummaries['data']
        
                cropETs = [item[crop] for item in data if crop in item and item[crop] is not None]
        
        
                #for item in data:
                #    if crop in item:
                #        if item['APPL'] is not None:                 
                #            print(crop, stationETSummaries['data'][crop])
                if len(cropETs) < 5:
                    # If there are not enough data points, fill with None
                    #cropETs = cropETs + [None] * (5 - len(cropETs))
                    print(f"Not enough data for {crop}, filling with None")
                dfHistET[crop + " histET"] = cropETs
                
        #print(df['APPL_histET'])

        # each column in the data we are gnerating  is a crop, each row (observation)  is a date
        chartData = []
        for day in range(0,5):
            rowData = {}
            rowData['Date'] = dates[day]  #.strftime('%m/%d') #.toLocaleDateString('en-US', { month: '2-digit', day: '2-digit' });
            for index, row in df.iterrows():  # cwuData.current.forEach(crop => {
                cropName = row['Name']
                _etCol = etCols[day]
                et = row[_etCol]
                rowData[cropName] = et

            chartData.append(rowData)
            
        # Convert DataFrame to JSON
        #logger.info(chartData)
        content = df.to_json(orient='records')
            
        return { 'crops': crops, 'dates': dates, 'cwuData': chartData}
    
    except Exception as e:
        return {}


def get_station_summary_data(station_id, start_date, end_date):
    """
    Retrieve summary ET data for a station within a specified date range.
    
    Args:
        station_id (str): The station ID (e.g., 'abei', 'bfgi')
        start_date (str or date): Start date in 'MM/DD' format or date object
        end_date (str or date): End date in 'MM/DD' format or date object
        summaries_dir (str): Directory containing the summary CSV files
        
    Returns:
        Dict: Dictionary containing:
            - 'station_id': The station identifier
            - 'date_range': Tuple of (start_date, end_date)
            - 'crops': List of available crop types
            - 'data': List of dictionaries, each containing date and crop ET values
            - 'metadata': Dictionary with file metadata (if available)
            
    Raises:
        FileNotFoundError: If the station summary file doesn't exist
        ValueError: If date format is invalid
        
    Example:
        >>> data = get_station_summary_data('abei', '06/01', '06/10')
        >>> print(f"Found {len(data['data'])} days of data")
        >>> for day in data['data']:
        ...     print(f"{day['DATE']}: ALFM={day['ALFM']}, BEET={day['BEET']}")
    """
    logger = logging.getLogger(__name__)   
    # Construct file path
    summary_file = os.path.join(AGRIMET_DATA_DIR, f"{station_id}_summary.csv")

    if not os.path.exists(summary_file):
        logger.error(f"Summary file not found: {summary_file}")
        raise FileNotFoundError(f"Summary file not found: {summary_file}")
    
    # Parse date inputs
    if isinstance(start_date, str):
        start_month, start_day = map(int, start_date.split('/'))
    else:
        start_month, start_day = start_date.month, start_date.day
        
    if isinstance(end_date, str):
        end_month, end_day = map(int, end_date.split('/'))
    else:
        end_month, end_day = end_date.month, end_date.day
    
    # Read the CSV file
    metadata = {}
    crops = []
    data_rows = []
    
    with open(summary_file, 'r', encoding='utf-8') as file:
        lines = file.readlines()
        
        # Parse metadata from comment lines and find data start
        data_start_idx = 0
        for i, line in enumerate(lines):
            line = line.strip()
            if line.startswith('#'):
                if ',' in line:
                    key, value = line[1:].split(',', 1)
                    metadata[key.strip()] = value.strip()
            elif line and not line.startswith('#'):
                # Found the header line
                data_start_idx = i
                break
        
        # Parse header and data
        csv_reader = csv.DictReader(lines[data_start_idx:])
        crops = [col for col in csv_reader.fieldnames if col != 'DATE']
        
        # Filter data by date range
        for row in csv_reader:
            row_date = row['DATE']
            if not row_date:
                continue
                
            try:
                month, day = map(int, row_date.split('/'))
                
                # Check if date is in range (handle year boundary)
                in_range = False
                if start_month <= end_month:
                    # Same year range (e.g., 06/01 to 08/15)
                    if start_month <= month <= end_month:
                        if month == start_month and day < start_day:
                            continue
                        if month == end_month and day > end_day:
                            continue
                        in_range = True
                else:
                    # Cross year boundary (e.g., 11/15 to 03/15)
                    if month >= start_month or month <= end_month:
                        if month == start_month and day < start_day:
                            continue
                        if month == end_month and day > end_day:
                            continue
                        in_range = True
                
                if in_range:
                    # Clean up the row data - convert empty strings to None
                    clean_row = {'DATE': row_date}
                    for crop in crops:
                        value = row.get(crop, '').strip()
                        if value and value != '':
                            try:
                                clean_row[crop] = float(value)
                            except ValueError:
                                clean_row[crop] = None
                        else:
                            clean_row[crop] = None
                    data_rows.append(clean_row)
                    
            except ValueError:
                logger.warning(f"Invalid date format in row: {row_date}, skipping row.")
                # Skip rows with invalid date format
                continue
    
    return {
        'station_id': station_id,
        'date_range': (start_date, end_date),
        'crops': crops,
        'data': data_rows,
        'metadata': metadata
    }

get_crop_water_use('crvo')

{}

In [26]:
res = get_crop_water_use('crvo')
print(res)

['07/10', '07/11', '07/12', '07/13', '07/14']
{}


In [22]:
data = [{'DATE': '06/01', 'ALFM': 0.166, 'ALFP': 0.194, 'APPL': 0.185, 'BETS': 0.119, 'BLUB': 0.194, 'CBBG': 0.128, 'FCRN': 0.076, 'GRSD': 0.152, 'HZLN': 0.217, 'LAWN': 0.156, 'PEAS': 0.185, 'POP1': 0.084, 'POP2': 0.145, 'POP3': 0.202, 'POTA': 0.118, 'PPMT': 0.182, 'SBRY': 0.185, 'SGRN': 0.194, 'SPNC': 0.116, 'SQSH': 0.081, 'TBER': 0.192, 'WGRN': 0.194, 'WGRP': 0.124}]
#_data = next((item for item in data if item["name"] == "APPL"), None)


for item in data:
    if 'APPL' in item:
        if item['APPL'] is not None:
            print(f"APPL: {item['APPL']}")
            break


APPL: 0.185


In [4]:
import pandas as pd
def agrimet_locations():    
    """
    Returns a list of Agrimet locations in CSV format.
    """    
    # Define the URL for the Agrimet locations CSV
    agrimet_locations_url = "https://www.usbr.gov/pn/agrimet/location.csv"
    df = pd.read_csv(agrimet_locations_url, skiprows=1)  # Read the CSV file into a DataFrame
    print(df.head())  # Print the first few rows of the DataFrame for debugging


agrimet_locations()

  siteid                                        description state   latitude  \
0   abei                    Aberdeen, Idaho Weather Station    ID  42.953333   
1   acki           INL - Blackfoot,  Idaho  Weather Station    ID  43.189850   
2   afty             Afton, Wyoming AgriMet Weather Station    WY  42.733330   
3   agko  Agency Lake Ranch, Oregon AgriMet Weather Station    OR  42.565270   
4   ahti              Ashton, Idaho AgriMet Weather Station    ID  44.025000   

   longitude  elevation     timezone    install  horizontal_datum  \
0 -112.82667   1341.000  US/Mountain  3/20/1991               NaN   
1 -112.33320   1377.696  US/Mountain        NaN               NaN   
2 -110.93583   1892.810  US/Mountain  11/1/1980               NaN   
3 -121.98250   1264.920   US/Pacific   5/3/2000               NaN   
4 -111.46666   1615.440  US/Mountain   6/1/1987               NaN   

  vertical_datum  ...  elevation_method tz_offset  active_flag     type  \
0              m  ...       a