# 1. 공통 상수 정의

In [18]:
import pandas as pd
import requests
import time
import os
from dotenv import load_dotenv

load_dotenv()

KAKAO_REST_API_KEY = os.getenv("KAKAO_REST_API_KEY")

HEADERS = {
    "Authorization": f"KakaoAK {KAKAO_REST_API_KEY}"
}

ADDRESS_URL = "https://dapi.kakao.com/v2/local/search/address.json"
CATEGORY_URL = "https://dapi.kakao.com/v2/local/search/category.json"
COORD2REGION_URL = "https://dapi.kakao.com/v2/local/geo/coord2regioncode.json"

CATEGORIES = {
    "CS2": "편의점",
    "MT1": "대형마트",
    "PK6": "주차장",
    "BK9": "은행",
    "FD6": "음식점",
    "CE7": "카페",
    "HP8": "병원",
    "PM9": "약국",
}

OUTPUT_PATH = "../output/collect_seoul_legal_dong_poi.csv"
RADIUS = 800

print(KAKAO_REST_API_KEY)

8d5e873d4ae52b36838aef3796e62a8e


In [19]:
def fetch_places(category, x, y, radius):
    page = 1
    rows = []

    while True:
        res = requests.get(
            CATEGORY_URL,
            headers=HEADERS,
            params={
                "category_group_code": category,
                "x": x,
                "y": y,
                "radius": radius,
                "page": page,
                "size": 15
            }
        )

        if res.status_code != 200:
            break

        data = res.json()
        rows.extend(data.get("documents", []))

        if data.get("meta", {}).get("is_end", True):
            break

        page += 1
        time.sleep(0.05)

    return rows


# 2. 주소 → 좌표

In [20]:
def dong_to_coord(address, sleep=0.1):
    res = requests.get(
        ADDRESS_URL,
        headers=HEADERS,
        params={"query": address}
    )

    if res.status_code != 200:
        return None, None

    docs = res.json().get("documents", [])
    if not docs:
        return None, None

    time.sleep(sleep)
    return float(docs[0]["x"]), float(docs[0]["y"])


# 3. 좌표 -> 법정동 코드

In [21]:
def coord_to_bcode_once(x, y, sleep=0.05):
    res = requests.get(
        COORD2REGION_URL,
        headers=HEADERS,
        params={"x": x, "y": y, "input_coord": "WGS84"}
    )

    if res.status_code != 200:
        return None

    for doc in res.json().get("documents", []):
        if doc.get("region_type") == "B":
            time.sleep(sleep)
            return doc.get("code")

    time.sleep(sleep)
    return None


# 4. Grid 생성 함수

In [22]:
def generate_grid(lon, lat, step=0.005, grid_range=0.01):
    """
    lon, lat : 법정동 중심 좌표
    step     : grid 간격
    range    : 중심으로부터 확장 범위
    """
    lons = [lon + i for i in frange(-grid_range, grid_range, step)]
    lats = [lat + i for i in frange(-grid_range, grid_range, step)]
    return [(x, y) for x in lons for y in lats]


def frange(start, stop, step):
    vals = []
    while start <= stop:
        vals.append(round(start, 6))
        start += step
    return vals


# 5. 카테고리 POI 수집

In [31]:
dong_df = pd.read_csv("../raw/legal_dong.csv")
RESUME_FROM_BCODE = "1171010800"

# 1. 서울 법정동만 필터
seoul_dong_df = dong_df[
    (dong_df["시도명"] == "서울특별시") &
    (dong_df["시군구명"].notna()) &
    (dong_df["읍면동명"].notna())
].copy()

# 2. 법정동코드 문자열화 + 정렬
seoul_dong_df["법정동코드"] = seoul_dong_df["법정동코드"].astype(str)
seoul_dong_df = seoul_dong_df.sort_values("법정동코드")

# 3. resume 적용 (여기서 끝)
seoul_dong_df = seoul_dong_df[
    seoul_dong_df["법정동코드"] > RESUME_FROM_BCODE
]

seen = set()

if os.path.exists(OUTPUT_PATH):
    done_df = pd.read_csv(OUTPUT_PATH)
    done_df = done_df.dropna(subset=["LEGALDONG_CD", "POI_ID"])

    seen = set(
        zip(
            done_df["LEGALDONG_CD"].astype(str),
            done_df["POI_ID"].astype(str)
        )
    )
else:
    seen = set()

if not os.path.exists(OUTPUT_PATH):
    pd.DataFrame(columns=[
        "LEGALDONG_CD", "POI_ID", "POI_NM",
        "CL_CD", "CL_NM",
        "LC_LO", "LC_LA",
        "ORIGIN_NM", "BASE_DE"
    ]).to_csv(OUTPUT_PATH, index=False)

for _, row in seoul_dong_df.iterrows():
    address = f"서울특별시 {row['시군구명']} {row['읍면동명']}"
    center_x, center_y = dong_to_coord(address)

    if center_x is None:
        continue

    grids = generate_grid(center_x, center_y)

    for gx, gy in grids:
        grid_bcode = coord_to_bcode_once(gx, gy)

        if grid_bcode != str(row["법정동코드"]):
            continue

        rows_to_save = []

        for cat in CATEGORIES.keys():
            places = fetch_places(cat, gx, gy, RADIUS)

            for p in places:
                key = (grid_bcode, p["id"])
                if key in seen:
                    continue

                seen.add(key)

                rows_to_save.append({
                    "LEGALDONG_CD": grid_bcode,
                    "POI_ID": p["id"],
                    "POI_NM": p["place_name"],
                    "CL_CD": p["category_group_code"],
                    "CL_NM": p["category_name"],
                    "LC_LO": p["x"],
                    "LC_LA": p["y"],
                    "ORIGIN_NM": "KAKAO_LOCAL_API",
                    "BASE_DE": time.strftime("%Y%m%d")
                })

        if rows_to_save:
            pd.DataFrame(rows_to_save).to_csv(
                OUTPUT_PATH,
                mode="a",
                header=False,
                index=False
            )

        time.sleep(0.2)


### 현재 파일 path 조회

In [29]:
import os
os.getcwd()
df = pd.read_csv("/Users/choiwoojin/2026/quad-s/output/collect_seoul_legal_dong_poi.csv")
df.tail(1)

Unnamed: 0,LEGALDONG_CD,POI_ID,POI_NM,CL_CD,CL_NM,LC_LO,LC_LA,ORIGIN_NM,BASE_DE
210379,1171010800,1367242854,송파새길약국,PM9,"의료,건강 > 약국",127.123116,37.483501,KAKAO_LOCAL_API,20260206


## legal_dong.csv랑 법정동 지명 매핑

In [2]:
import pandas as pd

# POI 데이터
poi_df = pd.read_csv(
    "/Users/choiwoojin/2026/quad-s/output/collect_seoul_legal_dong_poi.csv",
    dtype={"LEGALDONG_CD": str}
)

# 법정동 코드 테이블
dong_df = pd.read_csv(
    "/Users/choiwoojin/2026/quad-s/raw/legal_dong.csv",
    dtype={"법정동코드": str}
)

# 컬럼명 정리
dong_df = dong_df.rename(columns={
    "법정동코드": "LEGALDONG_CD",
    "시도명": "SIDO_NM",
    "시군구명": "SIGUNGU_NM",
    "읍면동명": "LEGALDONG_NM"
})

# 폐지된 법정동 제거
if "삭제일자" in dong_df.columns:
    dong_df = dong_df[dong_df["삭제일자"].isna()]

# 조인
merged_df = poi_df.merge(
    dong_df[
        ["LEGALDONG_CD", "SIDO_NM", "SIGUNGU_NM", "LEGALDONG_NM"]
    ],
    on="LEGALDONG_CD",
    how="left"
)

# 주소 문자열 생성
merged_df["LEGALDONG_ADDR"] = (
    merged_df["SIDO_NM"] + " " +
    merged_df["SIGUNGU_NM"] + " " +
    merged_df["LEGALDONG_NM"]
)

merged_df.to_csv(
    "/Users/choiwoojin/2026/quad-s/output/collect_seoul_legal_dong_poi_v1.1.csv",
    index=False,
    encoding="utf-8-sig"
)