In [105]:
import pandas as pd 
import requests
from tqdm import tqdm 
import os
import geopandas as gpd 
from shapely.geometry import Point, Polygon, MultiPolygon 
import geopandas as gpd
import pyproj
from shapely.ops import transform

In [106]:
data_url = "../서울시_자동차_도로_수집/make_file/(표면)_서울열선_광진도로.csv"
data_encoding = "UTF-8"

In [107]:
data = pd.read_csv(data_url, encoding=data_encoding, low_memory=False)

In [108]:
data.columns

Index(['도로명', '열선', '도로 종류', '시작점_위도', '시작점_경도', '종료점_위도', '종료점_경도', '중앙점_위도',
       '중앙점_경도', '도로_길이', '행정동', '도로폭', '도로규모', '행정구역', '고도의_차이', '경사각',
       '최근접_시설들_거리', '최근접_시설의_평균거리', '2019_평균_온도', '2020_평균_온도', '2021_평균_온도',
       '2022_평균_온도', '2023_평균_온도', '2024_평균_온도', '종합_평균_기온'],
      dtype='object')

In [109]:
dong_url = "./data/BND_ADM_DONG_PG/BND_ADM_DONG_PG.shp"
dong_encoding = "CP949"

In [110]:
# 행정동 경계 데이터 로드
gdf = gpd.read_file(dong_url, encoding=dong_encoding)

In [111]:
gdf.columns

Index(['BASE_DATE', 'ADM_CD', 'ADM_NM', 'geometry'], dtype='object')

In [112]:
gdf.head()

Unnamed: 0,BASE_DATE,ADM_CD,ADM_NM,geometry
0,20240630,24010510,충장동,"POLYGON ((192642.941 284096.719, 192642.09 284..."
1,20240630,24010540,동명동,"POLYGON ((193270.372 284034.819, 193270.194 28..."
2,20240630,36680400,안좌면,"MULTIPOLYGON (((113338.23 243273.44, 113342.37..."
3,20240630,36680410,팔금면,"MULTIPOLYGON (((124370.24 249372.92, 124367.44..."
4,20240630,36680420,암태면,"MULTIPOLYGON (((124912 256632.46, 124912.4 256..."


In [113]:
# 좌표 변환 함수 정의
project = pyproj.Transformer.from_crs("EPSG:4326", "EPSG:5186", always_xy=True).transform

In [114]:
def get_admin_dong_name(lon, lat):
    # 위경도 → EPSG:5186 좌표로 변환
    point = transform(project, Point(lon, lat))
    # print("변환된 좌표:", point)∑

    for _, row in gdf.iterrows():
        geom = row['geometry']
        if isinstance(geom, Polygon):ß∑
            if geom.contains(point):
                return row['ADM_NM']ß
        elif isinstance(geom, MultiPolygon):
            for poly in geom.geoms:ß∑
                if poly.contains(point):
                    return row['ADM_NM']
    return '해당 위치의 행정동을 찾을 수 없습니다.'


In [115]:
data.columns

Index(['도로명', '열선', '도로 종류', '시작점_위도', '시작점_경도', '종료점_위도', '종료점_경도', '중앙점_위도',
       '중앙점_경도', '도로_길이', '행정동', '도로폭', '도로규모', '행정구역', '고도의_차이', '경사각',
       '최근접_시설들_거리', '최근접_시설의_평균거리', '2019_평균_온도', '2020_평균_온도', '2021_평균_온도',
       '2022_평균_온도', '2023_평균_온도', '2024_평균_온도', '종합_평균_기온'],
      dtype='object')

In [116]:
dong_list = []

for i in tqdm(range(len(data))) :
    dong_name = get_admin_dong_name(data.loc[i]['중앙점_경도'], data.loc[i]['중앙점_위도'])
    dong_list.append(dong_name)

100%|███████████████████████████████████████| 7212/7212 [05:52<00:00, 20.47it/s]


In [127]:
try :
    data = data.drop(columns='행정동')
    print("삭제 완료")
except KeyError as e:
    print(e)

"['행정동'] not found in axis"


In [128]:
data['행정동'] = dong_list

In [129]:
data['행정동'].value_counts()

행정동
군자동     718
자양4동    672
중곡4동    558
화양동     527
자양2동    499
       ... 
이화동       1
대방동       1
신정7동      1
전농1동      1
일원1동      1
Name: count, Length: 223, dtype: int64

In [130]:
try :
    data.to_csv(data_url, encoding=data_encoding, index=False)
    print(f"{data_url} 덮어쓰기 완료.")
except OSError as e:
    print(e)

../서울시_자동차_도로_수집/make_file/(표면)_서울열선_광진도로.csv 덮어쓰기 완료.
