# Load Data

In [67]:
import pandas as pd

bus_info = pd.read_csv('./서울시 버스정류소 위치정보.csv', encoding='cp949')
print(bus_info.shape)
bus_info.head()

(11290, 6)


Unnamed: 0,노드 ID,정류소번호,정류소명,X좌표,Y좌표,정류소 타입
0,100000001,1001,종로2가사거리,126.987752,37.569808,중앙차로
1,100000002,1002,창경궁.서울대학교병원,126.996522,37.579433,중앙차로
2,100000003,1003,명륜3가.성대입구,126.998251,37.582581,중앙차로
3,100000004,1004,종로2가.삼일교,126.987613,37.568579,중앙차로
4,100000005,1005,혜화동로터리.여운형활동터,127.001744,37.586243,중앙차로


## Bus Station Information Data Preprocessing

In [68]:
bus_info = bus_info.drop('노드 ID', axis = 1)
bus_info = bus_info.drop('정류소번호', axis = 1)
bus_info = bus_info.drop('정류소 타입', axis = 1)
# columns above are not gonna be used for analysis since those are unique numbers of each bus station
bus_info.head()

Unnamed: 0,정류소명,X좌표,Y좌표
0,종로2가사거리,126.987752,37.569808
1,창경궁.서울대학교병원,126.996522,37.579433
2,명륜3가.성대입구,126.998251,37.582581
3,종로2가.삼일교,126.987613,37.568579
4,혜화동로터리.여운형활동터,127.001744,37.586243


### Map Each Coordinates of Bus Station to Findout Its Districts

In [8]:
import numpy as np

XY = np.array([list(i) for i in zip(bus_info['X좌표'], bus_info['Y좌표'])] )
XY

array([[126.987752  ,  37.569808  ],
       [126.996522  ,  37.579433  ],
       [126.998251  ,  37.582581  ],
       ...,
       [127.12359618,  37.53363026],
       [127.12549684,  37.53615493],
       [127.12733709,  37.54034306]])

### Use Kakao API to Findout Districts of Each Bus Station

In [51]:
# define function that findout districts
# reference: https://developers.kakao.com/docs/latest/ko/local/dev-guide#coord-to-address
import requests
from urllib.parse import urlparse
from time import sleep

def district(xy): 
    url = f"https://dapi.kakao.com/v2/local/geo/coord2regioncode.json?x={xy[0]}&y={xy[1]}"
    result = requests.get(urlparse(url).geturl(), headers={"Authorization":"KakaoAK ---여기에 키를 입력하시오---"}
                         , timeout=(30), verify=False)
    result_json = result.json()
    
    for document in result_json['documents']: 
        try:
            #region_type can be separated to H and B where H means 행정동, B means 법정동
            #region_3depth_name means the name of the district
            district_info = [document['region_type'], document['region_3depth_name'], document['x'], document['y']]
        except:             
            district_info=[0, 0, 0, 0]
    
    return district_info

In [52]:
district([127.011904958, 37.5420646255])



['H', '옥수동', 127.0134338126595, 37.54358863837599]

In [53]:
district_list = []
for i in XY: 
    district_list.append(district(i))





































































































































































































































































































































































































































































































































































































































































































































































































































In [69]:
# H stands for 행정동, B stands for 법정동
bus_district = pd.DataFrame(district_list, columns=['H/B', 'District', 'Longitude', 'Latitude'])
bus_district.head()

Unnamed: 0,H/B,District,Longitude,Latitude
0,H,종로1.2.3.4가동,126.990287,37.574436
1,H,종로1.2.3.4가동,126.990287,37.574436
2,H,혜화동,127.00062,37.586855
3,H,종로1.2.3.4가동,126.990287,37.574436
4,H,혜화동,127.00062,37.586855


In [70]:
bus_district['H/B'].unique()

array(['H'], dtype=object)

In [71]:
bus_district = bus_district.drop('H/B', axis=1)
bus_district.head()

Unnamed: 0,District,Longitude,Latitude
0,종로1.2.3.4가동,126.990287,37.574436
1,종로1.2.3.4가동,126.990287,37.574436
2,혜화동,127.00062,37.586855
3,종로1.2.3.4가동,126.990287,37.574436
4,혜화동,127.00062,37.586855


In [72]:
bus_info = bus_info.drop('X좌표', axis=1)
bus_info = bus_info.drop('Y좌표', axis=1)

bus_info_district = pd.concat([bus_info, bus_district], axis=1)
print(bus_info_district.shape)
bus_info_district.head()

(11290, 4)


Unnamed: 0,정류소명,District,Longitude,Latitude
0,종로2가사거리,종로1.2.3.4가동,126.990287,37.574436
1,창경궁.서울대학교병원,종로1.2.3.4가동,126.990287,37.574436
2,명륜3가.성대입구,혜화동,127.00062,37.586855
3,종로2가.삼일교,종로1.2.3.4가동,126.990287,37.574436
4,혜화동로터리.여운형활동터,혜화동,127.00062,37.586855


In [None]:
bus_info_district.to_csv('./data/bus_info_district.csv', index=False)