In [14]:
def create_extent(lat, lon, lat_distance=1, lon_distance=1):
    southwest_lat = lat - lat_distance
    southwest_lon = lon - lon_distance
    northeast_lat = lat + lat_distance
    northeast_lon = lon + lon_distance
    extent = f"{southwest_lat},{southwest_lon},{northeast_lat},{northeast_lon}"
    return extent, southwest_lat, southwest_lon, northeast_lat, northeast_lon

In [15]:
import requests


def get_fips_code(lat, lon):
    """
    Get the FIPS code for state and county when provided coordinates.

    Parameters:
    lat (float): Latitude of the location.
    lon (float): Longitude of the location.

    Returns:
    dict: A dictionary containing the state and county FIPS codes.
    """
    url = "https://geo.fcc.gov/api/census/block/find"
    params = {"latitude": lat, "longitude": lon, "format": "json"}

    response = requests.get(url, params=params)
    data = response.json()
    if "County" in data and "State" in data:
        fips_codes = {
            "state_fips": data["State"]["FIPS"],
            "county_fips": data["County"]["FIPS"],
        }
        return fips_codes
    else:
        raise ValueError("Could not retrieve FIPS codes for the provided coordinates.")


# Example usage
coordinates = (34.05, -118.55)  # Example coordinates (latitude, longitude)
fips_codes = get_fips_code(*coordinates)
print(
    f"State FIPS: {fips_codes['state_fips']}, County FIPS: {fips_codes['county_fips']}"
)

State FIPS: 06, County FIPS: 06037


In [18]:
from geopy.distance import geodesic


def get_nearest_noaa_weather_station_id(user_location, start_date, end_date):
    # this function returns the nearest weather station id based on the user location
    # for weather stations with data between start_date and end_date for stations with GHCND dataset temperature data
    # user_location: tuple of latitude and longitude
    # start_date: start date in the format 'YYYY-MM-DD'
    # end_date: end date in the format 'YYYY-MM-DD'
    # returns: nearest weather station id

    #######################################################
    # get nearest weather station based on fips for the county
    #######################################################
    fips_code = get_fips_code(*user_location)["county_fips"]
    stations_url = "https://www.ncei.noaa.gov/cdo-web/api/v2/stations"
    #user_location = (34.05, -118.25)  # Example coordinates (latitude, longitude)

    fips_code = get_fips_code(*user_location)["county_fips"]
    print(fips_code)
    # ext, sw_lat, sw_lon, ne_lat, ne_lon = create_extent(user_location[0], user_location[1])
    # Get NOAA stations metadata
    stations_url = "https://www.ncei.noaa.gov/cdo-web/api/v2/stations"
    headers = {"token": "XeVRiGIYmbnDCzzMCnjWCioSDjUYZXPi"}

    params = {
        "locationid": "FIPS:" + str(fips_code),
        # ensure the stations have temperature data
        "datacategoryid": "TEMP",
        "limit": 500,  # Adjust as needed
        # ensure the stations have temperature data
        "datasetid": "GHCND",  # Daily Summaries
        "startdate": start_date,
        "enddate": end_date,
        "units": "metric",
    }

    response = requests.get(stations_url, headers=headers, params=params)
    stations = response.json()["results"]
    print(stations)
    ##########################################################
    # get nearest station based on distance from user location
    ##########################################################
    extent = create_extent(user_location[0], user_location[1])
    params = {
        "extent": extent,
        # ensure the stations have temperature data
        "datacategoryid": "TEMP",
        "limit": 500,  # Adjust as needed
        # ensure the stations have temperature data
        "datasetid": "GHCND",  # Daily Summaries
        "startdate": start_date,
        "enddate": end_date,
        "units": "metric",
    }

    response = requests.get(stations_url, headers=headers, params=params, timeout=10)
    extent_stations = response.json()["results"]
    print('foo')
    print(extent_stations)
    stations = stations + extent_stations
    # Calculate the distance to each station

    # Calculate distances and sort by proximity
    distances = []
    for station in stations:
        station_location = (station["latitude"], station["longitude"])
        distance = geodesic(user_location, station_location).km
        station_dict = {
            "id": station["id"],
            "name": station["name"],
            "elevation": station["elevation"],
            "location": station_location,
            "distance": distance,
        }
        distances.append(station_dict)

    # Sort by distance and get the nearest 10 stations
    # nearest_stations_fips = sorted(distances, key=lambda x: x[3])[:20]
    distances = [i for n, i in enumerate(distances) if i not in distances[n + 1 :]]

    nearest_stations = sorted(distances, key=lambda x: x["distance"])[:20]
    nearest_station = nearest_stations[0]
    print(nearest_station)
    print(
        f"Nearest Station ID: {nearest_station['id']}, Name: {nearest_station['name']}, Distance: {nearest_station['distance']:.2f} km"
    )

    return nearest_stations

In [21]:
user_location = (
    37.547247,
    -121.994669
)

# if command takes longer than 10 seconds, terminate and retry up to 3 times
weather_station_data = None
retries = 0
try:
    weather_station_data = get_nearest_noaa_weather_station_id(
        user_location, "2023-01-01", "2023-01-02"
    )
except requests.exceptions.RequestException as e:
    print(e)
    retries += 1
    if retries == 3:
        print(
            "Failed to get the nearest weather station ID - sometimes NOAA goes down."
        )


"""
# Example coordinates (latitude, longitude) los angeles
get_nearest_noaa_weather_station_id(user_location, "2023-01-01", "2023-01-02")
"""

06001
[{'elevation': 94.5, 'mindate': '1893-01-01', 'maxdate': '2024-04-30', 'latitude': 37.8744, 'name': 'BERKELEY, CA US', 'datacoverage': 0.9251, 'id': 'GHCND:USC00040693', 'elevationUnit': 'METERS', 'longitude': -122.2605}, {'elevation': 11.6, 'mindate': '1996-06-01', 'maxdate': '2025-02-08', 'latitude': 37.5422, 'name': 'FREMONT, CA US', 'datacoverage': 0.9772, 'id': 'GHCND:USC00043244', 'elevationUnit': 'METERS', 'longitude': -122.0158}, {'elevation': 161.2, 'mindate': '1903-01-01', 'maxdate': '2025-01-13', 'latitude': 37.68194, 'name': 'LIVERMORE, CA US', 'datacoverage': 0.9853, 'id': 'GHCND:USC00044997', 'elevationUnit': 'METERS', 'longitude': -121.75139}, {'elevation': 9.1, 'mindate': '1970-10-01', 'maxdate': '2025-02-08', 'latitude': 37.7983, 'name': 'OAKLAND MUSEUM, CA US', 'datacoverage': 0.9347, 'id': 'GHCND:USC00046336', 'elevationUnit': 'METERS', 'longitude': -122.2642}, {'elevation': 18.6, 'mindate': '1955-02-01', 'maxdate': '2025-02-08', 'latitude': 37.79589, 'name': '

'\n# Example coordinates (latitude, longitude) los angeles\nget_nearest_noaa_weather_station_id(user_location, "2023-01-01", "2023-01-02")\n'

In [None]:
def get_noaa_weather_data(station_id, start_date, end_date):
    data_url = "https://www.ncei.noaa.gov/cdo-web/api/v2/data?datasetid=GHCND"
    headers = {"token": "XeVRiGIYmbnDCzzMCnjWCioSDjUYZXPi"}
    params = {
        # "datatypeid": "GHCND",
        "stationid": station_id,
        # "locationid": "ZIP:28801",
        "startdate": start_date,
        "enddate": end_date,
        "limit": 10,  # Adjust as needed
        "datatypeid": "TMAX,TMIN",  # Maximum temperature
        "units": "metric",
    }
    response = requests.get(data_url, headers=headers, params=params)
    print(response)
    print(response.url)
    data = response.json().get("results", [])
    return data


dat = get_noaa_weather_data("GHCND:USW00003171", "2024-11-01", "2021-01-02")

<Response [200]>
https://www.ncei.noaa.gov/cdo-web/api/v2/data?datasetid=GHCND&stationid=GHCND%3AUSW00093134&startdate=2021-01-01&enddate=2021-01-02&limit=10&datatypeid=TMAX%2CTMIN&units=metric


In [20]:
# turn into pd dataframe
import pandas as pd

df = pd.DataFrame(
    get_noaa_weather_data(weather_station_data[0]["id"], "2024-09-01", "2024-09-09")
)
print(df)

<Response [200]>
https://www.ncei.noaa.gov/cdo-web/api/v2/data?datasetid=GHCND&stationid=GHCND%3AUSW00093134&startdate=2024-09-01&enddate=2024-09-09&limit=10&datatypeid=TMAX%2CTMIN&units=metric
                  date datatype            station attributes  value
0  2024-09-01T00:00:00     TMAX  GHCND:USW00093134       ,,D,   30.0
1  2024-09-01T00:00:00     TMIN  GHCND:USW00093134       ,,D,   17.2
2  2024-09-02T00:00:00     TMAX  GHCND:USW00093134       ,,D,   31.1
3  2024-09-02T00:00:00     TMIN  GHCND:USW00093134       ,,D,   18.3
4  2024-09-03T00:00:00     TMAX  GHCND:USW00093134       ,,D,   32.2
5  2024-09-03T00:00:00     TMIN  GHCND:USW00093134       ,,D,   18.9
6  2024-09-04T00:00:00     TMAX  GHCND:USW00093134       ,,D,   34.4
7  2024-09-04T00:00:00     TMIN  GHCND:USW00093134       ,,D,   18.9
8  2024-09-05T00:00:00     TMAX  GHCND:USW00093134       ,,D,   38.9
9  2024-09-05T00:00:00     TMIN  GHCND:USW00093134       ,,D,   22.2
