In [47]:
import pandas as pd

df = pd.read_csv("../data/subway.csv", encoding="euc-kr")
subways = [
    "1호선",
    "2호선",
    "3호선",
    "4호선",
    "5호선",
    "6호선",
    "7호선",
    "8호선",
    "9호선",
    "인천1호선",
    "경의중앙선",
    "신분당선",
    "공항철도1호선",
    "중앙선",
    "경춘선",
    "수인선",
]
df = df[df["호선"].isin(subways)]
df = df.drop("역사_ID", axis=1)
df = df.rename({"역사명": "name", "호선": "line", "위도": "lat", "경도": "lon"}, axis=1)
df.to_csv("../data/seoul_subway.csv", encoding="utf-8", index=False)

In [48]:
import geopandas as gpd
from shapely.geometry import Point

gdf = gpd.read_file("../data/LSMD.shp", encoding="euc-kr")
gdf = gdf.to_crs(4326)
df = pd.read_csv(
    "../data/seoul_subway.csv", encoding="utf-8"
)  # station_name, lon, lat …
sub_gdf = gpd.GeoDataFrame(
    df, geometry=[Point(xy) for xy in zip(df.lon, df.lat)], crs="EPSG:4326"
)

In [49]:
gdf.head()

Unnamed: 0,EMD_CD,COL_ADM_SE,EMD_NM,SGG_OID,geometry
0,11110103,11110,궁정동,1034,"POLYGON ((126.97031 37.58418, 126.97043 37.584..."
1,11110120,11110,신문로1가,1032,"POLYGON ((126.97185 37.5698, 126.97192 37.5698..."
2,11110167,11110,충신동,375,"POLYGON ((127.00856 37.57674, 127.00857 37.576..."
3,11110185,11110,홍지동,374,"POLYGON ((126.95399 37.60475, 126.95404 37.604..."
4,11110154,11110,장사동,368,"POLYGON ((126.9923 37.57001, 126.99247 37.5700..."


In [50]:
# within/predicate='intersects' 중 선택 ('within'이 일반적)
stations_with_dong = gpd.sjoin(
    sub_gdf,
    gdf[["EMD_CD", "EMD_NM", "geometry"]],
    how="left",
    predicate="within",  # or 'intersects' for LRT 지상역
)
# 결과: station_name, lon, lat, geometry, ADM_CD(법정동코드), ADM_NM(동명)
stations_with_dong.head()

Unnamed: 0,name,line,lat,lon,geometry,index_right,EMD_CD,EMD_NM
0,남태령,4호선,37.463873,126.989134,POINT (126.98913 37.46387),425.0,11650101.0,방배동
1,사당,4호선,37.47653,126.981685,POINT (126.98168 37.47653),,,
2,총신대입구(이수),4호선,37.486263,126.981989,POINT (126.98199 37.48626),412.0,11590107.0,사당동
3,정자,신분당선,37.367098,127.108403,POINT (127.1084 37.3671),,,
4,판교,신분당선,37.394761,127.112217,POINT (127.11222 37.39476),,,


In [None]:
# 방법 A ― 간단 · 느림 (동 수가 적을 때)
gdf["neighbors"] = gdf.apply(
    lambda row: gdf[gdf.touches(row.geometry)]["EMD_NM"].tolist(), axis=1
)

In [52]:
# ── ①  dropna 로 '소속 동 없음' 역 제외(선택) ─────────────────────────
stations_with_dong = stations_with_dong.dropna(subset=["EMD_NM"])

# ── ②  dong_gdf → {동이름: [인접동, …]} 딕셔너리 만들기 ───────────────
neighbor_dict = gdf.set_index("EMD_NM")["neighbors"].to_dict()

# ── ③  map 한 방에 붙이기 ────────────────────────────────────────────
stations_with_dong["adjacent_dongs"] = stations_with_dong["EMD_NM"].map(
    lambda d: neighbor_dict.get(d, [])  # 동 이름 없으면 빈 리스트
)

# (선택) 열 이름 깨끗이 정리
stations_with_dong = stations_with_dong.rename(columns={"EMD_NM": "dong"})

In [55]:
subways = stations_with_dong.drop(["geometry", "index_right", "EMD_CD"], axis=1)
subways.to_csv("../data/seoul_subway.csv", encoding="utf-8", index=False)
subways.head()

Unnamed: 0,name,line,lat,lon,dong,adjacent_dongs
0,남태령,4호선,37.463873,126.989134,방배동,"[우면동, 서초동, 반포동]"
2,총신대입구(이수),4호선,37.486263,126.981989,사당동,"[동작동, 상도동, 남현동]"
5,청계산입구,신분당선,37.447211,127.055664,신원동,"[내곡동, 원지동, 양재동]"
6,동작(현충원),4호선,37.502971,126.979306,동작동,"[흑석동, 사당동, 상도동]"
7,양재시민의숲(매헌),신분당선,37.470023,127.03842,양재동,"[원지동, 신원동, 우면동, 염곡동, 서초동]"
