In [3]:
import chardet
import pandas as pd
import numpy as np
from scipy.spatial import distance
from pyproj import Transformer
from geopy.distance import geodesic


In [4]:
def open_with_auto_sensing_encoding_types(file_path):
    with open(file_path, 'rb') as f:
        rawdata = f.read()
        result = chardet.detect(rawdata)
        encoding_type = result['encoding']

    print(f"파일의 인코딩: {encoding_type}")
    # 감지된 인코딩으로 파일 읽기
    df = pd.read_csv(file_path, encoding=encoding_type)
    print(len(df))
    return df


def get_coord(df, x_name, y_name):
    """
    데이터프레임에서 x, y 좌표쌍을 추출하여 np.linalg.norm()을 바로 적용할 수 있는 넘파이 배열 반환.

    Parameters:
    df (pd.DataFrame): 입력 데이터프레임
    x_name (str): x좌표의 컬럼명
    y_name (str): y좌표의 컬럼명

    Returns:
    np.ndarray:
        - [[x1, y1], [x2, y2], ..., [xn, yn]] 형태의 넘파이 배열 (N, 2)
    """
    return df[[x_name, y_name]].dropna().reset_index(drop=True).to_numpy()  # NaN 제거 후 numpy 변환

def latlon_to_meters(lat1, lon1, lat2, lon2):
    """
    두 위경도 좌표 간의 거리를 미터(m) 단위로 변환하는 함수.

    Parameters:
    lat1, lon1 : 기준점 위도, 경도
    lat2, lon2 : 비교 대상 위도, 경도

    Returns:
    float: 두 좌표 간의 거리(m)
    """
    # 위도/경도 차이 계산
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    
    # 평균 위도 (경도 변환 시 필요)
    avg_lat = np.radians((lat1 + lat2) / 2)

    # 위도, 경도를 미터로 변환
    lat_meters = dlat * 111320  # 위도 1도 ≈ 111.32 km
    lon_meters = dlon * (111320 * np.cos(avg_lat))  # 경도는 cos(위도) 보정 필요

    return np.sqrt(lat_meters**2 + lon_meters**2)  # 피타고라스 정리 적용

def closest_dist(target_coord, coords):
    '''
    가장 가까운 좌표의 인덱스와 거리 계산
    
    Parameters:
    target_coord (np.ndarray): 단일 좌표 (예: [x, y])
    coords (np.ndarray): 여러 개의 좌표를 가진 2D 넘파이 배열 (N, 2)
    
    Returns:
    idx (int): 가장 가까운 위치의 인덱스 번호
    dist (float): 가장 가까운 위치까지의 거리
    '''
    # 거리 계산 (위경도를 실제 거리(m)로 변환)
    distances = latlon_to_meters(coords[:, 0], coords[:, 1], target_coord[0], target_coord[1])

    # 최소 거리 인덱스 찾기
    idx = np.argmin(distances)

    # 가장 가까운 거리값 반환
    dist = distances[idx]

    return idx, dist

def calculate_min_distance(distance_df, reference_coords, data_name):
    '''
    각 행의 좌표에 대해 reference_coords에서 가장 가까운 좌표의 인덱스와 거리 계산
    
    Parameters:
    distance_df (pd.DataFrame): 좌표 데이터를 포함한 데이터프레임
    reference_coords (np.ndarray): 비교할 대상 (예: 유치원 위치 배열)
    data_name (str): 결과 컬럼명에서 사용할 데이터 이름 (예: 'kindergarden')

    Returns:
    pd.DataFrame:
        target좌표 | {data_name}_최소거리 | {data_name}_idx 형태의 데이터프레임 반환
    '''
    # 결과 저장 리스트
    results = []

    # 각 좌표에 대해 거리 계산
    for target in distance_df.to_numpy():
        idx, dist = closest_dist(target, reference_coords)  # 최소거리, 인덱스 계산
        results.append([dist, idx])

    distance_df
    # 결과 데이터프레임 생성
    result_df = pd.DataFrame(results, columns=[f"{data_name}_최소거리(m)", f"{data_name}_idx"])
    
    return result_df

def get_name(target_df, src_df, index_col, name_col):
    '''
    인덱스 값(시설 인덱스)을 실제 시설명으로 변환.

    Parameters:
    target_df (pd.DataFrame): 변환할 데이터프레임 (예: distance_results)
    src_df (pd.DataFrame): 원본 데이터프레임 (예: 시설 데이터셋)
    index_col (str): target_df에서 시설 인덱스를 저장한 컬럼명 (예: 'highschool_coord_idx')
    name_col (str): src_df에서 시설명을 저장한 컬럼명 (예: '시설명')

    Returns:
    pd.DataFrame: 인덱스 값이 시설명으로 변환된 새로운 데이터프레임
    '''
    # 인덱스를 기반으로 시설명 매핑
    index_to_name = src_df[name_col]  # src_df의 시설명 컬럼을 가져옴

    # 시설 인덱스를 시설명으로 변환 (매핑)
    target_df[f"{index_col}_시설명"] = target_df[index_col].map(index_to_name)

    return target_df


In [5]:
def get_distance_df(
        target_distance_df,
        public_df,
        x_colname,
        y_colname,
        place_colname,
        col_name,
):
    '''
    target_distance_df : 기준점 좌표가 있는 DataFrame
    public_df : 거리 측정 대상 DataFrame
    x_colname, y_colname : public_df의 경도, 위도 열
    place_colname : public_df의 시설명 열
    col_name : 결과 컬럼 접두사
    '''
    # 좌표 추출
    public_coords = get_coord(public_df, x_colname, y_colname)

    # 거리 계산 및 가장 가까운 시설 인덱스, 거리 포함
    public_distance_df = calculate_min_distance(target_distance_df, public_coords, col_name)

    # 가까운 시설명 붙이기 (컬럼: f"{col_name}_name")
    public_distance_df = get_name(public_distance_df, public_df, f"{col_name}_idx", place_colname)

    # 원본에 모든 정보 그대로 붙이기
    merged = pd.concat(
        [target_distance_df.reset_index(drop=True),
         public_distance_df.reset_index(drop=True)],
        axis=1
    )

    return merged


In [6]:

def count_nearby_facilities(kids_cafe_df, facility_df, x_colname, y_colname, place_colname, threshold_km=1.0, col_name='nearby_count'):
    # 필요한 컬럼만 추출 및 결측 제거
    facility_df = facility_df[[place_colname, y_colname, x_colname]].dropna()

    counts = []
    for _, cafe in kids_cafe_df.iterrows():
        cafe_location = (cafe[y_colname], cafe[x_colname])
        count = sum(
            geodesic(cafe_location, (fac[y_colname], fac[x_colname])).km <= threshold_km
            for _, fac in facility_df.iterrows()
        )
        counts.append(count)
    
    # counts를 DataFrame으로 만들어서 원본과 병합
    counts_df = pd.DataFrame({col_name: counts})
    merged_df = pd.concat([kids_cafe_df.reset_index(drop=True), counts_df], axis=1)
    
    return merged_df


In [7]:
final_df_path = "/Users/yujin/Desktop/파일/3-1/데이터분석 공모전/codes/키즈카페입지분석2/data/target/final_targetV2.csv"
final_df = open_with_auto_sensing_encoding_types(final_df_path)
final_df

파일의 인코딩: utf-8
1765


Unnamed: 0.1,Unnamed: 0,행정동,center_lat,center_lon,주차장수,최소거리(m)
0,0,자양4동,37.529359,127.057131,0.0,0.0
1,1,자양4동,37.528462,127.058268,0.0,0.0
2,2,자양4동,37.529363,127.058263,0.0,0.0
3,3,자양4동,37.530265,127.058257,0.0,0.0
4,4,자양4동,37.531166,127.058252,0.0,0.0
...,...,...,...,...,...,...
1760,1760,광장동,37.554795,127.113589,0.0,0.0
1761,1761,광장동,37.555697,127.113584,0.0,0.0
1762,1762,광장동,37.556598,127.113580,0.0,0.0
1763,1763,광장동,37.557499,127.113575,0.0,0.0


In [10]:
final_coord_df = final_df[["center_lon", "center_lat"]]
final_coord_df.rename(columns={"center_lon" : "경도", "center_lat" : "위도"}, inplace=True)
final_coord_df

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  final_coord_df.rename(columns={"center_lon" : "경도", "center_lat" : "위도"}, inplace=True)


Unnamed: 0,경도,위도
0,127.057131,37.529359
1,127.058268,37.528462
2,127.058263,37.529363
3,127.058257,37.530265
4,127.058252,37.531166
...,...,...
1760,127.113589,37.554795
1761,127.113584,37.555697
1762,127.113580,37.556598
1763,127.113575,37.557499


In [11]:
subway_station_path = "/Users/yujin/Desktop/파일/3-1/데이터분석 공모전/codes/키즈카페입지분석2/data/공공장소/my지하철.csv"
bus_station_path = "/Users/yujin/Desktop/파일/3-1/데이터분석 공모전/codes/키즈카페입지분석2/data/공공장소/광진구_버스정류장_최종.csv"
bad_place_merged_path = "/Users/yujin/Desktop/파일/3-1/데이터분석 공모전/codes/키즈카페입지분석2/data/공공장소/bad_placed_merged.csv"
child_protection_path = "/Users/yujin/Desktop/파일/3-1/데이터분석 공모전/codes/키즈카페입지분석2/data/공공장소/서울특별시_광진구_어린이보호구역_정리본.csv"
accident_path = "/Users/yujin/Desktop/파일/3-1/데이터분석 공모전/codes/키즈카페입지분석2/data/공공장소/광진구사고다발구역.csv"
parking_path = "/Users/yujin/Desktop/파일/3-1/데이터분석 공모전/codes/키즈카페입지분석2/data/공공장소/주차장병합.csv"

subway_station_df = open_with_auto_sensing_encoding_types(subway_station_path)
bus_station_df = open_with_auto_sensing_encoding_types(bus_station_path)
bad_place_merged_df = open_with_auto_sensing_encoding_types(bad_place_merged_path)
child_protection_df = open_with_auto_sensing_encoding_types(child_protection_path)
accident_df = open_with_auto_sensing_encoding_types(accident_path)
parking_df = open_with_auto_sensing_encoding_types(parking_path)

파일의 인코딩: utf-8
11
파일의 인코딩: utf-8
156
파일의 인코딩: utf-8
538
파일의 인코딩: UTF-8-SIG
66
파일의 인코딩: UTF-8-SIG
7
파일의 인코딩: UTF-8-SIG
348


In [12]:
# 지하철 정류장거리, 개수
appended_df = get_distance_df(
        target_distance_df = final_coord_df,
        public_df = subway_station_df,
        x_colname = '경도',
        y_colname = '위도',
        place_colname = '역이름',
        col_name = 'subway',
)
appended_df = count_nearby_facilities(
        kids_cafe_df = appended_df,
        facility_df = subway_station_df,
        x_colname = '경도',
        y_colname = '위도',
        place_colname = '역이름',
        threshold_km = 0.5,
)
appended_df.head(3)


Unnamed: 0,경도,위도,subway_최소거리(m),subway_idx,subway_idx_시설명,nearby_count
0,127.057131,37.529359,943.105681,10,자양,0
1,127.058268,37.528462,827.611715,10,자양,0
2,127.058263,37.529363,818.040769,10,자양,0


In [13]:
# 버스 정류장거리, 개수
appended_df = get_distance_df(
        target_distance_df = appended_df,
        public_df = bus_station_df,
        x_colname = '경도',
        y_colname = '위도',
        place_colname = '시설명',
        col_name = 'bus',
)
appended_df = count_nearby_facilities(
        kids_cafe_df = appended_df,
        facility_df = bus_station_df,
        x_colname = '경도',
        y_colname = '위도',
        place_colname = '시설명',
        threshold_km = 0.5,
)
appended_df.head(3)


Unnamed: 0,경도,위도,subway_최소거리(m),subway_idx,subway_idx_시설명,nearby_count,bus_최소거리(m),bus_idx,bus_idx_시설명,nearby_count.1
0,127.057131,37.529359,943.105681,10,자양,0,658.159095,92,영동대교북단,0
1,127.058268,37.528462,827.611715,10,자양,0,636.487794,92,영동대교북단,0
2,127.058263,37.529363,818.040769,10,자양,0,584.165586,92,영동대교북단,0


In [14]:
# 유흥업 정류장거리, 개수
appended_df = get_distance_df(
        target_distance_df = appended_df,
        public_df = bad_place_merged_df,
        x_colname = '경도',
        y_colname = '위도',
        place_colname = '사업장명',
        col_name = 'badplace',
)
appended_df = count_nearby_facilities(
        kids_cafe_df = appended_df,
        facility_df = bad_place_merged_df,
        x_colname = '경도',
        y_colname = '위도',
        place_colname = '사업장명',
        threshold_km = 0.5,
)
appended_df.head(3)

Unnamed: 0,경도,위도,subway_최소거리(m),subway_idx,subway_idx_시설명,nearby_count,bus_최소거리(m),bus_idx,bus_idx_시설명,nearby_count.1,badplace_최소거리(m),badplace_idx,badplace_idx_시설명,nearby_count.2
0,127.057131,37.529359,943.105681,10,자양,0,658.159095,92,영동대교북단,0,678.06918,347,체리,0
1,127.058268,37.528462,827.611715,10,자양,0,636.487794,92,영동대교북단,0,580.961545,459,비에이치씨한강뚝섬3호점,0
2,127.058263,37.529363,818.040769,10,자양,0,584.165586,92,영동대교북단,0,566.033273,459,비에이치씨한강뚝섬3호점,1


In [15]:
# 어린이보호구역, 최소거리 및 300미터 내 개수
appended_df = get_distance_df(
        target_distance_df = appended_df,
        public_df = child_protection_df,
        x_colname = '경도',
        y_colname = '위도',
        place_colname = '시설명',
        col_name = 'child_protection',
)
appended_df = count_nearby_facilities(
        kids_cafe_df = appended_df,
        facility_df = child_protection_df,
        x_colname = '경도',
        y_colname = '위도',
        place_colname = '시설명',
        threshold_km = 0.3,
)
appended_df.head(3)

Unnamed: 0,경도,위도,subway_최소거리(m),subway_idx,subway_idx_시설명,nearby_count,bus_최소거리(m),bus_idx,bus_idx_시설명,nearby_count.1,badplace_최소거리(m),badplace_idx,badplace_idx_시설명,nearby_count.2,child_protection_최소거리(m),child_protection_idx,child_protection_idx_시설명,nearby_count.3
0,127.057131,37.529359,943.105681,10,자양,0,658.159095,92,영동대교북단,0,678.06918,347,체리,0,1283.829239,28,인애유치원,0
1,127.058268,37.528462,827.611715,10,자양,0,636.487794,92,영동대교북단,0,580.961545,459,비에이치씨한강뚝섬3호점,0,1178.187396,28,인애유치원,0
2,127.058263,37.529363,818.040769,10,자양,0,584.165586,92,영동대교북단,0,566.033273,459,비에이치씨한강뚝섬3호점,1,1161.653603,28,인애유치원,0


In [16]:
# 사고다발 지역과의 최소거리 및 200미터 내 개수
appended_df = get_distance_df(
        target_distance_df = appended_df,
        public_df = accident_df,
        x_colname = '경도',
        y_colname = '위도',
        place_colname = '지점명',
        col_name = 'accident',
)
appended_df = count_nearby_facilities(
        kids_cafe_df = appended_df,
        facility_df = accident_df,
        x_colname = '경도',
        y_colname = '위도',
        place_colname = '지점명',
        threshold_km = 0.2,
)
appended_df.head(3)

Unnamed: 0,경도,위도,subway_최소거리(m),subway_idx,subway_idx_시설명,nearby_count,bus_최소거리(m),bus_idx,bus_idx_시설명,nearby_count.1,...,badplace_idx_시설명,nearby_count.2,child_protection_최소거리(m),child_protection_idx,child_protection_idx_시설명,nearby_count.3,accident_최소거리(m),accident_idx,accident_idx_시설명,nearby_count.4
0,127.057131,37.529359,943.105681,10,자양,0,658.159095,92,영동대교북단,0,...,체리,0,1283.829239,28,인애유치원,0,1148.133137,3,서울특별시 광진구 자양동(동부간선도로-강변북로램프(6) 부근),0
1,127.058268,37.528462,827.611715,10,자양,0,636.487794,92,영동대교북단,0,...,비에이치씨한강뚝섬3호점,0,1178.187396,28,인애유치원,0,1028.175732,3,서울특별시 광진구 자양동(동부간선도로-강변북로램프(6) 부근),0
2,127.058263,37.529363,818.040769,10,자양,0,584.165586,92,영동대교북단,0,...,비에이치씨한강뚝섬3호점,1,1161.653603,28,인애유치원,0,1022.444612,3,서울특별시 광진구 자양동(동부간선도로-강변북로램프(6) 부근),0


In [17]:
# 사고다발 지역과의 최소거리 및 200미터 내 개수
appended_df = get_distance_df(
        target_distance_df = appended_df,
        public_df = parking_df,
        x_colname = '경도',
        y_colname = '위도',
        place_colname = '유무료',
        col_name = 'parking'
)
appended_df = count_nearby_facilities(
        kids_cafe_df = appended_df,
        facility_df = parking_df,
        x_colname = '경도',
        y_colname = '위도',
        place_colname = '유무료',
        threshold_km = 0.3
)
appended_df.head(3)

Unnamed: 0,경도,위도,subway_최소거리(m),subway_idx,subway_idx_시설명,nearby_count,bus_최소거리(m),bus_idx,bus_idx_시설명,nearby_count.1,...,child_protection_idx_시설명,nearby_count.2,accident_최소거리(m),accident_idx,accident_idx_시설명,nearby_count.3,parking_최소거리(m),parking_idx,parking_idx_시설명,nearby_count.4
0,127.057131,37.529359,943.105681,10,자양,0,658.159095,92,영동대교북단,0,...,인애유치원,0,1148.133137,3,서울특별시 광진구 자양동(동부간선도로-강변북로램프(6) 부근),0,550.087297,267,유료,0
1,127.058268,37.528462,827.611715,10,자양,0,636.487794,92,영동대교북단,0,...,인애유치원,0,1028.175732,3,서울특별시 광진구 자양동(동부간선도로-강변북로램프(6) 부근),0,489.745395,268,유료,0
2,127.058263,37.529363,818.040769,10,자양,0,584.165586,92,영동대교북단,0,...,인애유치원,0,1022.444612,3,서울특별시 광진구 자양동(동부간선도로-강변북로램프(6) 부근),0,448.508335,268,유료,0


In [18]:
merged_df = pd.concat([appended_df.reset_index(drop=True),
                       final_df.reset_index(drop=True)], axis=1)
merged_df

Unnamed: 0.1,경도,위도,subway_최소거리(m),subway_idx,subway_idx_시설명,nearby_count,bus_최소거리(m),bus_idx,bus_idx_시설명,nearby_count.1,...,parking_최소거리(m),parking_idx,parking_idx_시설명,nearby_count.2,Unnamed: 0,행정동,center_lat,center_lon,주차장수,최소거리(m)
0,127.057131,37.529359,943.105681,10,자양,0,658.159095,92,영동대교북단,0,...,550.087297,267,유료,0,0,자양4동,37.529359,127.057131,0.0,0.0
1,127.058268,37.528462,827.611715,10,자양,0,636.487794,92,영동대교북단,0,...,489.745395,268,유료,0,1,자양4동,37.528462,127.058268,0.0,0.0
2,127.058263,37.529363,818.040769,10,자양,0,584.165586,92,영동대교북단,0,...,448.508335,268,유료,0,2,자양4동,37.529363,127.058263,0.0,0.0
3,127.058257,37.530265,812.868007,10,자양,0,533.568661,92,영동대교북단,0,...,412.046497,268,유료,0,3,자양4동,37.530265,127.058257,0.0,0.0
4,127.058252,37.531166,812.177464,10,자양,0,485.237012,92,영동대교북단,0,...,381.033788,267,유료,0,4,자양4동,37.531166,127.058252,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1760,127.113589,37.554795,1284.652258,8,광나루(장신대),0,463.036623,116,정보도서관,0,...,998.059694,317,유료,0,1760,광장동,37.554795,127.113589,0.0,0.0
1761,127.113584,37.555697,1315.304670,8,광나루(장신대),0,504.661967,116,정보도서관,0,...,1024.237918,317,유료,0,1761,광장동,37.555697,127.113584,0.0,0.0
1762,127.113580,37.556598,1347.980340,8,광나루(장신대),0,549.813943,116,정보도서관,0,...,1053.249009,317,유료,0,1762,광장동,37.556598,127.113580,0.0,0.0
1763,127.113575,37.557499,1382.535815,8,광나루(장신대),0,597.693837,116,정보도서관,0,...,1084.865723,317,유료,0,1763,광장동,37.557499,127.113575,0.0,0.0


In [19]:
# final_tile_df = merged_df[['경도', '위도', 'subway_최소거리(m)', 'nearby_count',
#                            'bus_최소거리(m)', 'nearby_count',
#                            'badplace_최소거리(m)', 'nearby_count',
#                            'child_protection_최소거리(m)', 'nearby_count',
#                            'accident_최소거리(m)', 'nearby_count',
#                            '주차장수', '최소거리(m)']]
# final_tile_df

In [21]:
merged_df.to_csv("/Users/yujin/Desktop/파일/3-1/데이터분석 공모전/codes/키즈카페입지분석2/data/output/tile_per_datasV3.csv")