In [1]:
!pip install pyproj
!pip install geopy



In [2]:
import pandas as pd
from sklearn.neighbors import BallTree
import numpy as np
from pyproj import Transformer

main = pd.read_csv('bike_rental_master.csv', encoding='cp949')

In [3]:
main = main.drop(columns=['주소2'])
main = main[~((main['위도'] == 0) | (main['경도'] == 0))]
main2 = main

main_coord = np.deg2rad(main[['경도', '위도']].values)
tree = BallTree(main_coord, metric='haversine')
main2_coord = np.deg2rad(main[['경도', '위도']].values)

radius = 500 / 6371000
weight_sum = []

for point in main2_coord:
    ind, dist = tree.query_radius([point], r=radius, return_distance=True)
    nearby_dist = dist[0] * 6371000
    weight = 1 - (nearby_dist / 500)
    weight_sum.append(weight.sum() - 1)

main['500m내_대여소가중치'] = weight_sum

In [4]:
bus = pd.read_csv('bus_stop_location.csv', encoding='cp949')

bus_coord = np.deg2rad(bus[['X좌표', 'Y좌표']].values)
tree = BallTree(bus_coord, metric='haversine')
main_coord = np.deg2rad(main[['경도', '위도']].values)

radius = 500 / 6371000
weight_sum = []

for point in main_coord:
    ind, dist = tree.query_radius([point], r=radius, return_distance=True)
    nearby_dist = dist[0] * 6371000
    weight = 1 - (nearby_dist / 500)
    weight_sum.append(weight.sum())

main['500m내_버스정류소가중치'] = weight_sum

In [5]:
subway = pd.read_csv('subway_station_location.csv', encoding='cp949')

subway_coord = np.deg2rad(subway[['경도', '위도']].values)
tree = BallTree(subway_coord, metric='haversine')
main_coord = np.deg2rad(main[['경도', '위도']].values)

radius = 500 / 6371000
weight_sum = []

for point in main_coord:
    ind, dist = tree.query_radius([point], r=radius, return_distance=True)
    nearby_dist = dist[0] * 6371000
    weight = 1 - (nearby_dist / 500)
    weight_sum.append(weight.sum())

main['500m내_지하철역가중치'] = weight_sum

In [6]:
bike_accident = pd.read_csv('bike_accident_data.csv', encoding='cp949')

subway_coord = np.deg2rad(bike_accident[['경도', '위도']].values)
tree = BallTree(subway_coord, metric='haversine')
main_coord = np.deg2rad(main[['경도', '위도']].values)

radius = 500 / 6371000
weight_sum = []
accident_values = bike_accident['사고건수'].values

for point in main_coord:
    ind, dist = tree.query_radius([point], r=radius, return_distance=True)
    ind = ind[0]
    nearby_dist = dist[0] * 6371000
    weight = (1 - (nearby_dist / 500)) * accident_values[ind]
    weight_sum.append(weight.sum())

main['500m내_사고건수_가중치'] = weight_sum

In [7]:
parking = pd.read_csv('bike_shed_location.csv', encoding='cp949')

parkingCoord = np.deg2rad(parking[['y 좌표', 'x 좌표']].values)
tree = BallTree(parkingCoord, metric='haversine')
mainCoord = np.deg2rad(main[['위도', '경도']].values)

radius = 500 / 6371000
weightSum = []

for point in mainCoord:
    ind, dist = tree.query_radius([point], r=radius, return_distance=True)
    nearbyDist = dist[0] * 6371000
    weight = 1 - (nearbyDist / 500)
    weightSum.append(weight.sum())

main['500m내_자전거보관소가중치'] = weightSum

In [8]:
crossroad = pd.read_csv('crossroad_data.csv', encoding='cp949')

crossroad = crossroad.dropna(subset=['X좌표', 'Y좌표'])

# 3. 좌표계 변환: EPSG:5186 → EPSG:4326
transformer = Transformer.from_crs("EPSG:5186", "EPSG:4326", always_xy=True)

경도, 위도 = transformer.transform(crossroad['X좌표'].values, crossroad['Y좌표'].values)
crossroad['경도'] = 경도
crossroad['위도'] = 위도

crossroad_coord = np.deg2rad(crossroad[['위도', '경도']].values)
tree = BallTree(crossroad_coord, metric='haversine')
main_coord = np.deg2rad(main[['위도', '경도']].values)

radius = 500 / 6371000
weight_sum = []

for point in main_coord:
    ind, dist = tree.query_radius([point], r=radius, return_distance=True)
    nearby_dist = dist[0] * 6371000
    weight = 1 - (nearby_dist / 500)
    weight_sum.append(weight.sum())

main['500m내_교차로가중치'] = weight_sum

In [9]:
bikelane = pd.read_csv("bike_lane_data.csv", encoding='cp949')
bikelane = bikelane.dropna(subset=['위도', '경도'])

bikelane_coord = np.deg2rad(bikelane[['위도', '경도']].values)
tree = BallTree(bikelane_coord, metric='haversine')
main_coord = np.deg2rad(main[['위도', '경도']].values)

radius = 500 / 6371000
weight_sum = []

for point in main_coord:
    ind, dist = tree.query_radius([point], r=radius, return_distance=True)
    nearby_dist = dist[0] * 6371000
    weight = 1 - (nearby_dist / 500)
    weight_sum.append(weight.sum())

main['500m내_자전거도로가중치'] = weight_sum

In [10]:
# 1. 데이터 불러오기
spots = pd.read_csv('tourist_attraction_location.csv', encoding='utf-8-sig')  

# 2. NaN 있는 관광지 제거
spots_clean = spots.dropna(subset=['위도', '경도'])

# 3. 위경도 → 라디안 변환
main_coords = np.deg2rad(main[['위도', '경도']].values)
spot_coords = np.deg2rad(spots_clean[['위도', '경도']].values)

# 4. BallTree 생성
tree = BallTree(spot_coords, metric='haversine')

# 5. 500m 반경 계산 (라디안 단위)
radius = 500 / 6371000  # 지구 반지름: 6371km

# 6. 반경 내 관광지 개수 → 0보다 크면 1
counts = tree.query_radius(main_coords, r=radius, count_only=True)
main['500m내_관광지_여부'] = (counts > 0).astype(int)

In [11]:
dong = pd.read_csv('dong_data.csv', encoding='utf-8-sig')

main = main.merge(dong[['대여소_ID', '동']], on='대여소_ID', how='left')

In [12]:
diner = pd.read_csv('diner_location_data.csv', encoding='utf-8-sig')
diner = diner.dropna(subset=['위도', '경도'])

In [13]:
diner_coord = np.deg2rad(diner[['경도', '위도']].values)
tree = BallTree(diner_coord, metric='haversine')
main_coord = np.deg2rad(main[['경도', '위도']].values)

radius = 500 / 6371000
weight_sum = []

for point in main_coord:
    ind, dist = tree.query_radius([point], r=radius, return_distance=True)
    nearby_dist = dist[0] * 6371000
    weight = 1 - (nearby_dist / 500)
    weight_sum.append(weight.sum() - 1)

main['500m내_휴게음식점가중치'] = weight_sum

In [14]:
highschool = pd.read_csv('high_school_data.csv', encoding='cp949')
highschool = highschool.dropna(subset=['위도', '경도'])

highschool_coord = np.deg2rad(highschool[['위도', '경도']].values)
tree = BallTree(highschool_coord, metric='haversine')
main_coord = np.deg2rad(main[['위도', '경도']].values)

radius = 500 / 6371000
weight_sum = []

for point in main_coord:
    ind, dist = tree.query_radius([point], r=radius, return_distance=True)
    nearby_dist = dist[0] * 6371000
    weight = 1 - (nearby_dist / 500)
    weight_sum.append(weight.sum())

main['500m내_고등학교가중치'] = weight_sum

In [15]:
college = pd.read_csv('college_data.csv', encoding='cp949')
college = college.dropna(subset=['위도', '경도'])

college_coord = np.deg2rad(college[['위도', '경도']].values)
tree = BallTree(college_coord, metric='haversine')
main_coord = np.deg2rad(main[['위도', '경도']].values)

radius = 500 / 6371000
weight_sum = []

for point in main_coord:
    ind, dist = tree.query_radius([point], r=radius, return_distance=True)
    nearby_dist = dist[0] * 6371000
    weight = 1 - (nearby_dist / 500)
    weight_sum.append(weight.sum())

main['500m내_대학교가중치'] = weight_sum

In [16]:
main['구'] = main['주소1'].str.extract(r'(\S+구)')
main = main.drop(columns=['주소1'])

In [17]:
main.to_csv('data_preprocessing_file.csv', index=False, encoding='cp949')