In [None]:
import requests
import pandas as pd
import re
import time

# NOAA NCEI API Key
# API_TOKEN = "XbyuERiTrXrJMbExiMRwzCNOZKwjQQep"  # Apply for an API key at https://www.ncdc.noaa.gov/cdo-web/token

# HEADERS = {
#     "token": API_TOKEN
# }

def get_noaa_station_id(lat, lon):
    """ Query NOAA weather station ID based on latitude and longitude """
    # Define search range (latitude +/- 0.1, longitude +/- 0.1 to find the nearest station)
    min_lat, max_lat = lat - 0.1, lat + 0.1
    min_lon, max_lon = lon - 0.1, lon + 0.1
    
    # Construct API request URL
    stations_url = f"https://www.ncdc.noaa.gov/cdo-web/api/v2/stations"
    params = {
        "extent": f"{min_lat},{min_lon},{max_lat},{max_lon}",  # Specify search range
        "datasetid": "GHCND",  # Choose GHCND dataset
        "limit": 5  # Limit the number of returned stations
    }
    response = requests.get(stations_url, headers=HEADERS, params=params)

    if response.status_code == 200:
        data = response.json()
        if "results" in data:
            stations = data["results"]
            print('Succeeded!')
            return stations[0]['id']  # Return the closest weather station ID
        else:
            print("No suitable weather station found")
            return None
    else:
        print("NOAA API request failed:", response.status_code, response.text)
        return None

country = 'USA-NPN_individual_phenometrics_data'
df = pd.read_csv(f'{country}.csv')
df.rename(columns={'State': 'location', 'Latitude': 'lat', 'Longitude': 'long'}, inplace=True)
# df['location'] = df['location'].str.replace('South Korea/', '')
df['location'] = df['location'].str.lower()

df = df[['location', 'lat', 'long']].copy()
df.drop_duplicates(inplace=True)

station_dict = {}

for index, row in df.iterrows():
    if row['location'] in station_dict:
        print(f'The ID of {row["location"]} has already been obtained.')
    else:
        # Target latitude and longitude
        latitude = row['lat'] 
        longitude = row['long']
        print(f'Obtaining {row["location"]}\'s station ID....')
        # Get weather station ID
        station = get_noaa_station_id(latitude, longitude)
        if not station:
            print(f'The ID of {row["location"]} is none.')
        else:
            station_dict[row['location']] = station.replace('GHCND:', '')
        time.sleep(2)


station_df = pd.DataFrame(list(station_dict.items()), columns=['location', 'STATION'])
station_df.to_csv('korea_station_id.csv', index=False)