In [2]:
import os
import requests
import pandas as pd
from datetime import datetime, timedelta, timezone
from dotenv import load_dotenv

BASE_URL = "https://apis.data.go.kr/1360000/VilageFcstInfoService_2.0"
VILAGE_BASE_TIMES = ["0200", "0500", "0800", "1100", "1400", "1700", "2000", "2300"]

def load_service_key():
    load_dotenv()
    key = os.getenv("KMA_SERVICE_KEY")
    if not key:
        raise ValueError("KMA_SERVICE_KEY가 .env에서 안 읽힘. .env 위치/키 이름 확인!")
    return key

def pick_latest_vilage_base(dt_kst=None):
    KST = timezone(timedelta(hours=9))
    if dt_kst is None:
        dt_kst = datetime.now(KST)

    ymd = dt_kst.strftime("%Y%m%d")
    hm  = dt_kst.strftime("%H%M")

    candidates = [t for t in VILAGE_BASE_TIMES if t <= hm]
    if candidates:
        return ymd, candidates[-1]

    ymd_yesterday = (dt_kst - timedelta(days=1)).strftime("%Y%m%d")
    return ymd_yesterday, "2300"

def fetch_vilage_items_all(service_key, nx, ny, base_date, base_time, num_rows=1000, max_pages=50):
    url = f"{BASE_URL}/getVilageFcst"
    all_items = []
    total_count = None

    for page in range(1, max_pages + 1):
        params = {
            "serviceKey": service_key,
            "numOfRows": num_rows,
            "pageNo": page,
            "dataType": "JSON",
            "base_date": str(base_date),
            "base_time": str(base_time),
            "nx": int(nx),
            "ny": int(ny),
        }
        r = requests.get(url, params=params, timeout=20)

        if r.status_code != 200:
            print("HTTP", r.status_code)
            print("URL:", r.url[:180] + "...")
            print("BODY(head):", r.text[:400])
            r.raise_for_status()

        data = r.json()
        header = data.get("response", {}).get("header", {})
        if header.get("resultCode") != "00":
            raise RuntimeError(f"API 오류: {header.get('resultCode')} / {header.get('resultMsg')}")

        body = data.get("response", {}).get("body", {})
        if total_count is None:
            total_count = body.get("totalCount")

        items = body.get("items", {}).get("item", []) or []
        all_items.extend(items)

        if total_count is not None and len(all_items) >= int(total_count):
            break
        if not items:
            break

    return all_items

def preprocess_items(items, airport_name, icao, nx, ny, base_date, base_time):
    df = pd.DataFrame(items)
    if df.empty:
        return pd.DataFrame()

    df = df[df["category"].isin(["TMP", "WSD"])].copy()
    df["fcst_datetime"] = pd.to_datetime(df["fcstDate"] + df["fcstTime"], format="%Y%m%d%H%M")

    wide = (
        df.pivot_table(
            index=["fcst_datetime"],
            columns="category",
            values="fcstValue",
            aggfunc="first"
        )
        .reset_index()
        .rename(columns={"TMP": "temp_c", "WSD": "wind_speed_ms"})
    )

    wide["temp_c"] = pd.to_numeric(wide["temp_c"], errors="coerce")
    wide["wind_speed_ms"] = pd.to_numeric(wide["wind_speed_ms"], errors="coerce")

    wide["공항"] = airport_name
    wide["ICAO"] = icao
    wide["nx"] = nx
    wide["ny"] = ny
    wide["base_date"] = base_date
    wide["base_time"] = base_time

    return wide[["공항", "ICAO", "fcst_datetime", "temp_c", "wind_speed_ms", "nx", "ny", "base_date", "base_time"]]

def build_airport_forecast_df(airport_csv_path="airport_nxny_map.csv", base_date=None, base_time=None):
    service_key = load_service_key()

    if base_date is None or base_time is None:
        base_date, base_time = pick_latest_vilage_base()

    ap = pd.read_csv(airport_csv_path)

    # ✅ 여기 추가: 15개 공항만 사용
    ap = ap[ap["ICAO"].isin(["RKJJ", "RKJK", "RKSS", "RKPK", "RKTN", "RKJB", "RKPS", "RKNY", "RKJY", "RKPU", "RKNW", "RKSI", "RKPC", "RKTU", "RKTH"])]

    required = {"공항", "ICAO", "nx", "ny"}
    if not required.issubset(set(ap.columns)):
        raise ValueError(f"CSV 컬럼이 예상과 다름. 필요={required}, 실제={set(ap.columns)}")

    out = []
    for _, r in ap.iterrows():
        airport_name = r["공항"]
        icao = r["ICAO"]
        nx = int(r["nx"])
        ny = int(r["ny"])

        items = fetch_vilage_items_all(service_key, nx, ny, base_date, base_time)
        df_one = preprocess_items(items, airport_name, icao, nx, ny, base_date, base_time)
        out.append(df_one)

    df_all = pd.concat(out, ignore_index=True).sort_values(["공항", "fcst_datetime"]).reset_index(drop=True)
    return df_all

# 실행
df_all = build_airport_forecast_df("airport_nxny_map.csv")
display(df_all.head(30))

# 저장(원하면)
# df_all.to_csv("airport_tmp_wsd_forecast.csv", index=False, encoding="utf-8-sig")


category,공항,ICAO,fcst_datetime,temp_c,wind_speed_ms,nx,ny,base_date,base_time
0,광주,RKJJ,2026-01-26 15:00:00,2,3.3,57,74,20260126,1400
1,광주,RKJJ,2026-01-26 16:00:00,2,2.8,57,74,20260126,1400
2,광주,RKJJ,2026-01-26 17:00:00,1,1.6,57,74,20260126,1400
3,광주,RKJJ,2026-01-26 18:00:00,0,1.2,57,74,20260126,1400
4,광주,RKJJ,2026-01-26 19:00:00,-1,1.2,57,74,20260126,1400
5,광주,RKJJ,2026-01-26 20:00:00,-1,1.2,57,74,20260126,1400
6,광주,RKJJ,2026-01-26 21:00:00,-2,1.0,57,74,20260126,1400
7,광주,RKJJ,2026-01-26 22:00:00,-2,1.0,57,74,20260126,1400
8,광주,RKJJ,2026-01-26 23:00:00,-3,0.6,57,74,20260126,1400
9,광주,RKJJ,2026-01-27 00:00:00,-3,0.6,57,74,20260126,1400


In [3]:
# df_all = 지금 너가 만든 최종 데이터프레임이라고 가정

cols = ["공항", "ICAO", "fcst_datetime", "temp_c", "wind_speed_ms", "base_date", "base_time"]
df_all = df_all[cols].copy()

# 보기 좋게 정렬
df_all = df_all.sort_values(["공항", "fcst_datetime"]).reset_index(drop=True)

display(df_all)


category,공항,ICAO,fcst_datetime,temp_c,wind_speed_ms,base_date,base_time
0,광주,RKJJ,2026-01-26 15:00:00,2,3.3,20260126,1400
1,광주,RKJJ,2026-01-26 16:00:00,2,2.8,20260126,1400
2,광주,RKJJ,2026-01-26 17:00:00,1,1.6,20260126,1400
3,광주,RKJJ,2026-01-26 18:00:00,0,1.2,20260126,1400
4,광주,RKJJ,2026-01-26 19:00:00,-1,1.2,20260126,1400
...,...,...,...,...,...,...,...
985,포항경주,RKTH,2026-01-29 12:00:00,2,2.0,20260126,1400
986,포항경주,RKTH,2026-01-29 15:00:00,4,2.0,20260126,1400
987,포항경주,RKTH,2026-01-29 18:00:00,0,2.0,20260126,1400
988,포항경주,RKTH,2026-01-29 21:00:00,-2,1.0,20260126,1400
