# Load Data

In [23]:
import pandas as pd

subway_info = pd.read_csv('C:/Users/ITM28/Downloads/서울시 지하철역 엘리베이터 위치정보.csv', encoding='cp949')
print(subway_info.shape)
subway_info.head()

(643, 10)


Unnamed: 0,노드링크 유형,노드 WKT,노드 ID,노드 유형 코드,시군구코드,시군구명,읍면동코드,읍면동명,지하철역코드,지하철역명
0,NODE,POINT(126.98515892357797 37.57622646659184),211632,0,1111000000,종로구,1111013400,경운동,269.0,안국
1,NODE,POINT(126.97413365201709 37.575965675947124),212372,1,1111000000,종로구,1111010700,적선동,271.0,경복궁(정부서울청사)
2,NODE,POINT(126.99183426224357 37.572491218588375),211578,1,1111000000,종로구,1111015100,묘동,265.0,종로3가
3,NODE,POINT(127.0158608830301 37.57329756951232),211842,0,1111000000,종로구,1111017500,숭인동,268.0,동묘앞
4,NODE,POINT(127.01551499790813 37.57951303710329),212418,0,1111000000,종로구,1111017500,숭인동,270.0,창신


## Bus Station Information Data Preprocessing

In [24]:
subway_info = subway_info.drop(['노드 ID', '노드링크 유형', '시군구코드', '노드 유형 코드', '지하철역코드', '읍면동코드'], axis = 1)
# columns above are not gonna be used for analysis since those are unique numbers of each subway station
subway_info.head()

Unnamed: 0,노드 WKT,시군구명,읍면동명,지하철역명
0,POINT(126.98515892357797 37.57622646659184),종로구,경운동,안국
1,POINT(126.97413365201709 37.575965675947124),종로구,적선동,경복궁(정부서울청사)
2,POINT(126.99183426224357 37.572491218588375),종로구,묘동,종로3가
3,POINT(127.0158608830301 37.57329756951232),종로구,숭인동,동묘앞
4,POINT(127.01551499790813 37.57951303710329),종로구,숭인동,창신


### Map Each Coordinates of Subway Station to Findout Its Districts

In [25]:
import numpy as np

# Split into longitude and latitude
subway_info['Point'] = [point[6:-1] for point in subway_info['노드 WKT']]
subway_info['Longitude'] = [float(point.split(' ')[0]) for point in subway_info['Point']]
subway_info['Latitude'] = [float(point.split(' ')[1]) for point in subway_info['Point']]

#Delete the original columns
subway_info = subway_info.drop(['노드 WKT', 'Point'], axis=1)
subway_info.head()

XY = np.array([list(i) for i in zip(subway_info['Longitude'], subway_info['Latitude'])])
XY

array([[126.98515892,  37.57622647],
       [126.97413365,  37.57596568],
       [126.99183426,  37.57249122],
       ...,
       [127.13820385,  37.51977513],
       [127.17628284,  37.55740515],
       [127.12448672,  37.53811503]])

### Use Kakao API to Findout Districts of Each Subway Station

In [26]:
# define function that findout districts
# reference: https://developers.kakao.com/docs/latest/ko/local/dev-guide#coord-to-address
import requests
from urllib.parse import urlparse
from time import sleep
import urllib3

def district(xy): 
    urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
    url = f"https://dapi.kakao.com/v2/local/geo/coord2regioncode.json?x={xy[0]}&y={xy[1]}"
    result = requests.get(urlparse(url).geturl(), headers={"Authorization":"KakaoAK 02e326932df53911a4316d1f33f53f96"}
                         , timeout=(30), verify=False)
    result_json = result.json()
    
    for document in result_json['documents']: 
        try:
            #region_type can be separated to H and B where H means 행정동, B means 법정동
            #region_3depth_name means the name of the district
            district_info = [document['region_type'], document['region_3depth_name'], document['x'], document['y']]
        except:             
            district_info=[0, 0, 0, 0]
    
    return district_info

In [27]:
district([127.011904958, 37.5420646255])

['H', '옥수동', 127.0134338126595, 37.54358863837599]

In [28]:
district_list = []
for i in XY: 
    district_list.append(district(i))

In [30]:
# H stands for 행정동, B stands for 법정동
subway_district = pd.DataFrame(district_list, columns=['H/B', 'District', 'Longitude', 'Latitude'])
subway_district.head()

Unnamed: 0,H/B,District,Longitude,Latitude
0,H,종로1.2.3.4가동,126.990287,37.574436
1,H,사직동,126.968846,37.576187
2,H,종로1.2.3.4가동,126.990287,37.574436
3,H,숭인1동,127.015645,37.57783
4,H,숭인1동,127.015645,37.57783


In [31]:
subway_district['H/B'].unique()

array(['H'], dtype=object)

In [32]:
subway_district = subway_district.drop('H/B', axis=1)
subway_district.head()

Unnamed: 0,District,Longitude,Latitude
0,종로1.2.3.4가동,126.990287,37.574436
1,사직동,126.968846,37.576187
2,종로1.2.3.4가동,126.990287,37.574436
3,숭인1동,127.015645,37.57783
4,숭인1동,127.015645,37.57783


In [35]:
subway_info = subway_info.drop('Longitude', axis=1)
subway_info = subway_info.drop('Latitude', axis=1)

subway_info_district = pd.concat([subway_info, subway_district], axis=1)
print(subway_info_district.shape)
subway_info_district.head()

(643, 6)


Unnamed: 0,시군구명,읍면동명,지하철역명,District,Longitude,Latitude
0,종로구,경운동,안국,종로1.2.3.4가동,126.990287,37.574436
1,종로구,적선동,경복궁(정부서울청사),사직동,126.968846,37.576187
2,종로구,묘동,종로3가,종로1.2.3.4가동,126.990287,37.574436
3,종로구,숭인동,동묘앞,숭인1동,127.015645,37.57783
4,종로구,숭인동,창신,숭인1동,127.015645,37.57783


In [38]:
subway_info_district.to_csv('./subway_info_district.csv', index=False)