In [1]:
import pandas as pd
import numpy as np
import zipfile
import os
import requests

In [2]:
subway_ = pd.read_csv('station_coordinate.csv', names = ['line', 'name', 'code', 'latitude', 'longitude' ], encoding='CP949', skiprows=1)
subway_.head()

Unnamed: 0,line,name,code,latitude,longitude
0,01호선,녹양,1908.0,37.75938,127.042292
1,01호선,남영,1002.0,37.541021,126.9713
2,01호선,용산,1003.0,37.529849,126.964561
3,01호선,노량진,1004.0,37.514219,126.942454
4,01호선,대방,1005.0,37.513342,126.926382


In [3]:
def juso(x) :
    method = "GET"
    url = "https://dapi.kakao.com/v2/local/geo/coord2regioncode.json"
    params = {'x' : x['longitude'], 'y' : x['latitude']}
    header = {'authorization': 'KakaoAK apikey', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36'}
    
    try :
        response = requests.request(method=method, url=url, headers=header, params=params )
        tokens = response.json()
        x['gu_name'] = tokens['documents'][1]['region_2depth_name']
        x['dong_name'] = tokens['documents'][1]['region_3depth_name']
        x['full_name'] = tokens['documents'][1]['region_1depth_name']+' '+tokens['documents'][1]['region_2depth_name']+' '+tokens['documents'][1]['region_3depth_name']
    except :
        x['gu_name'] = ''
        x['dong_name'] = ''
        x['full_name'] = ''
    return x

In [4]:
path = "CARD_SUBWAY_MONTH"
file_lst = os.listdir(path)
from io import BytesIO

subway_all = pd.DataFrame()

for file in file_lst:
    zip_ = zipfile.ZipFile(path+'/'+file, 'r')
    listOfFileNames = zip_.namelist()
    for fileName in listOfFileNames:
        zipRead = zip_.read(fileName)
        subway = pd.read_csv(BytesIO(zipRead), index_col=False)
        subway = subway[['역명', '하차총승객수']]
        subway = subway.groupby(by=['역명']).sum()
        subway = subway.reset_index()
        subway_all = pd.concat([subway_all,subway])

In [5]:
subway_all = subway_all.groupby(by='역명').mean().reset_index()
subway_all.head()

Unnamed: 0,역명,하차총승객수
0,4.19민주묘지,73716.53
1,가능,165300.0
2,가락시장,440993.7
3,가산디지털단지,1544755.0
4,가양,502739.7


In [6]:
print(len(subway_), len(subway_all))

730 534


In [7]:
subway = pd.merge(subway_, subway_all, how='left', left_on='name', right_on ='역명')
subway.drop(['code', '역명'], axis=1, inplace=True)
subway['하차총승객수'] =subway['하차총승객수'].fillna(0)
subway

Unnamed: 0,line,name,latitude,longitude,하차총승객수
0,01호선,녹양,37.759380,127.042292,113944.470588
1,01호선,남영,37.541021,126.971300,247855.058824
2,01호선,용산,37.529849,126.964561,934132.529412
3,01호선,노량진,37.514219,126.942454,897086.176471
4,01호선,대방,37.513342,126.926382,334581.617647
...,...,...,...,...,...
725,인천선,지식정보단지,37.378384,126.645168,0.000000
726,인천선,인천대입구,37.386007,126.639484,0.000000
727,인천선,센트럴파크,37.393054,126.634729,0.000000
728,인천선,국제업무지구,37.399907,126.630347,0.000000


In [10]:
subway = subway.apply(juso, axis =1).drop(['latitude', 'longitude'], axis=1)
# bus = bus.apply(juso, axis =1)
subway.head()

Unnamed: 0,line,name,하차총승객수,gu_name,dong_name,full_name
0,01호선,녹양,113944.470588,의정부시,녹양동,경기도 의정부시 녹양동
1,01호선,남영,247855.058824,용산구,남영동,서울특별시 용산구 남영동
2,01호선,용산,934132.529412,용산구,한강로동,서울특별시 용산구 한강로동
3,01호선,노량진,897086.176471,동작구,노량진1동,서울특별시 동작구 노량진1동
4,01호선,대방,334581.617647,영등포구,신길7동,서울특별시 영등포구 신길7동


In [11]:
subway = subway[subway['full_name'].str.contains('서울특별시')]

In [12]:
subway

Unnamed: 0,line,name,하차총승객수,gu_name,dong_name,full_name
1,01호선,남영,2.478551e+05,용산구,남영동,서울특별시 용산구 남영동
2,01호선,용산,9.341325e+05,용산구,한강로동,서울특별시 용산구 한강로동
3,01호선,노량진,8.970862e+05,동작구,노량진1동,서울특별시 동작구 노량진1동
4,01호선,대방,3.345816e+05,영등포구,신길7동,서울특별시 영등포구 신길7동
5,01호선,영등포,1.080642e+06,영등포구,영등포본동,서울특별시 영등포구 영등포본동
...,...,...,...,...,...,...
654,우이신설경전철,성신여대입구,0.000000e+00,성북구,삼선동,서울특별시 성북구 삼선동
655,우이신설경전철,보문,2.108596e+05,성북구,보문동,서울특별시 성북구 보문동
656,우이신설경전철,신설동,4.897334e+05,동대문구,용신동,서울특별시 동대문구 용신동
657,우이신설경전철,화계,8.111994e+04,강북구,인수동,서울특별시 강북구 인수동


In [13]:
subway.shape

(390, 6)

In [14]:
subway.to_csv('subwaystation.csv',encoding='cp949',index=False)