In [None]:
# download temp mean data for Global 2m temps (GFS Ensemble 0.5 Degree Bias-Corrected)
# doc: at https://nomads.ncep.noaa.gov/
# this will take multiple minutes
import os
import requests
import time
from bs4 import BeautifulSoup

# CONFIGURATION FOR FORECASTS TO DOWNLOAD (only last two days are available)
# MAKE SURE THERE IS NO TRAILING SLASH
base_urls = [
    "https://nomads.ncep.noaa.gov/pub/data/nccf/com/naefs/prod/gefs.20230726/00/pgrb2ap5_bc",
    "https://nomads.ncep.noaa.gov/pub/data/nccf/com/naefs/prod/gefs.20230727/00/pgrb2ap5_bc"
]

def download_file(url, folder_path):
    response = requests.get(url)
    file_name = os.path.basename(url)
    file_path = os.path.join(folder_path, file_name)
    with open(file_path, 'wb') as f:
        f.write(response.content)

def get_file_urls(base_url):
    response = requests.get(base_url)
    soup = BeautifulSoup(response.text, 'html.parser')
    urls = []
    for link in soup.find_all('a'):
        href = link.get('href')
        if href and not href.startswith('../') and href.startswith('geavg.t00z.pgrb2a.0p50_bcf') and 'idx' not in href:
            urls.append(href)
    return urls

num_urls = len(base_urls)
cur_url_count = 0
for base_url in base_urls:
    folder_path = os.path.basename(os.path.normpath(
        base_url.split('/')[-3]
    ))
    print(folder_path)
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)

    cur_url_count += 1
    print(f"Processing {cur_url_count} / {num_urls} folders to download GEFS, bias corrected, 2m ensemble mean temperatures")

    file_urls = get_file_urls(base_url)
    time.sleep(2)
    
    i = 0
    numfiles = len(file_urls)
    for file_url in file_urls:
        download_url = f"https://nomads.ncep.noaa.gov/cgi-bin/filter_gensbc.pl?dir=/{base_url.split('https://nomads.ncep.noaa.gov/pub/data/nccf/com/naefs/prod/')[-1]}&file={file_url}&var_TMP=on&lev_2_m_above_ground=on"
        i += 1
        print(f" - Downloading {i} / {numfiles} : {download_url}")
        download_file(download_url, folder_path)
        time.sleep(2)  # Pause for 2 seconds between downloads

In [19]:
# calculate averages for downloaded forecast data
## CONFIGURE TO FOLDER NAMES
folders = ["gefs.20230726", "gefs.20230727"]  # Add more folders if needed

import pygrib
import numpy as np
from datetime import datetime, timedelta
import os
import glob

def get_date_from_offset(date_string, offset_hours):
    date_format = "%Y%m%d"
    date = datetime.strptime(date_string, date_format)
    result_date = date + timedelta(hours=offset_hours)
    return result_date

def date_to_time_str(date_time):
    date_format = "%Y-%m-%d %H:%M:%S"
    time_str = date_time.strftime(date_format)
    return time_str

def date_to_day_str(date_time):
    date_format = "%m-%d-%Y"
    month_day_year = date_time.strftime(date_format)
    return month_day_year

def kelvin_to_celsius(kelvin_temperature):
    celsius_temperature = kelvin_temperature - 273.15
    return celsius_temperature

def get_average_temperature(file_path):
    grbs = pygrib.open(file_path)
    # first get the forecast avg temp for that time step
    forecast_temperatures = {}
    for grb in grbs:
        # there should only be one entry even this is a for loop
        date = grb.dataDate
        # hours offset from forecast
        forecastTime = grb.forecastTime
        # calculate the valid time for this data
        valid_time = get_date_from_offset(str(date), forecastTime)
        time_str = date_to_time_str(valid_time)
            
        # each step is in units of hours, with intervals of every 3 hours
        # this is a forecast so the first hour is missing (only in analysis data)
        # there should only be one value
        if '2 metre temperature' in grb.name:
            avg = kelvin_to_celsius(grb.average)
            forecast_temperatures[valid_time] = avg
    grbs.close()
    return forecast_temperatures

def process_files_in_folder(folder_path):
    file_list = sorted(glob.glob(os.path.join(folder_path, "*")))
    num_files = len(file_list)
    print(f"--   Processing {num_files} files...")
    last_date = None
    max_temp = None
    averages_for_folder = []
    forecast_temperatures = {}
    for file_path in file_list:
        # get average temperature for step (should only be one time step)
        forecast_temperatures.update(get_average_temperature(file_path))
    
    # calculate the average for each day
    last_day_str = None
    day_averages = []
    last_avg = None
    for valid_time, avg in forecast_temperatures.items():
        # handle case for first observation
        if last_day_str is None:
            last_day_str = date_to_day_str(valid_time)

        day_str = date_to_day_str(valid_time)
        if day_str == last_day_str:
            # time step in same day
            day_averages.append(avg)
        else:
            # time step goes to next day
            # print out the previous day's average first
            day_average = np.average(day_averages)
            if last_avg == None:
                last_avg = day_average
            day_over_day_delta = day_average - last_avg
            print(f"Global 2m temp average (C) for {last_day_str}: {day_average:5.3f} (day over day delta: {day_over_day_delta:5.3f})")
            # clear the list for the new day
            day_averages = []
            # change the last_day
            last_day_str = day_str
            # add the current day's average to the list
            day_averages.append(avg)
            last_avg = avg

for folder in folders:
    folder_path = os.path.join(".", folder)  # Adjust the path if 
    print(f"-- Processing {folder_path}")
    process_files_in_folder(folder_path)

-- Processing ./gefs.20230726
--   Processing 96 files...
Global 2m temp average (C) for 07-26-2023: 8.496 (day over day delta: 0.000)
Global 2m temp average (C) for 07-27-2023: 8.565 (day over day delta: 0.347)
Global 2m temp average (C) for 07-28-2023: 8.568 (day over day delta: 0.180)
Global 2m temp average (C) for 07-29-2023: 8.280 (day over day delta: 0.171)
Global 2m temp average (C) for 07-30-2023: 8.160 (day over day delta: 0.233)
Global 2m temp average (C) for 07-31-2023: 8.113 (day over day delta: 0.299)
Global 2m temp average (C) for 08-01-2023: 8.224 (day over day delta: 0.337)
Global 2m temp average (C) for 08-02-2023: 8.393 (day over day delta: 0.361)
Global 2m temp average (C) for 08-03-2023: 8.540 (day over day delta: 0.317)
Global 2m temp average (C) for 08-04-2023: 8.674 (day over day delta: 0.322)
Global 2m temp average (C) for 08-05-2023: 8.687 (day over day delta: 0.284)
Global 2m temp average (C) for 08-06-2023: 8.655 (day over day delta: 0.290)
Global 2m temp ave