In [1]:
import pandas as pd
import numpy as np
import zipfile
import os
import requests

In [2]:
def juso(x) :
    method = "GET"
    url = "https://dapi.kakao.com/v2/local/geo/coord2regioncode.json"
    params = {'x' : x['longitude'], 'y' : x['latitude']}
    header = {'authorization': 'KakaoAK apikey', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36'}
    
    try :
        response = requests.request(method=method, url=url, headers=header, params=params )
        tokens = response.json()
        x['gu_name'] = tokens['documents'][1]['region_2depth_name']
        x['dong_name'] = tokens['documents'][1]['region_3depth_name']
        x['full_name'] = tokens['documents'][1]['region_1depth_name']+' '+tokens['documents'][1]['region_2depth_name']+' '+tokens['documents'][1]['region_3depth_name']
    except :
        x['gu_name'] = ''
        x['dong_name'] = ''
        x['full_name'] = ''
    return x

In [3]:
bus_stop = pd.read_csv('서울시버스정류소좌표데이터(2022.08.24).csv', encoding='CP949', names = ['node_id', 'ars_id', 'busstop_name', 'longitude', 'latitude' ], skiprows=1)
bus_stop.head()

Unnamed: 0,node_id,ars_id,busstop_name,longitude,latitude
0,100000001,1001,종로2가사거리,126.987786,37.569764
1,100000002,1002,창경궁.서울대학교병원,126.99652,37.579179
2,100000003,1003,명륜3가.성대입구,126.99829,37.582709
3,100000004,1004,종로2가.삼일교,126.987507,37.568582
4,100000005,1005,혜화동로터리.여운형활동터,127.001694,37.58623


In [4]:
path = "BUS_STATION_BOARDING_MONTH"
file_lst = os.listdir(path)
from io import BytesIO

bus_all = pd.DataFrame()

for file in file_lst:
    zip_ = zipfile.ZipFile(path+'/'+file, 'r')
    listOfFileNames = zip_.namelist()
    for fileName in listOfFileNames:
        zipRead = zip_.read(fileName)
        bus = pd.read_csv(BytesIO(zipRead), encoding='CP949')
        bus = bus[['버스정류장ARS번호', '하차총승객수']]
        bus = bus.groupby(by=['버스정류장ARS번호']).sum()
        bus = bus.reset_index()
        bus_all = pd.concat([bus_all,bus])

In [5]:
bus_all = bus_all.groupby(by='버스정류장ARS번호').mean().reset_index()
bus_all = bus_all[bus_all['버스정류장ARS번호'] != '~']
bus_all = bus_all.astype({'버스정류장ARS번호':'int'})
bus_all.head()

Unnamed: 0,버스정류장ARS번호,하차총승객수
0,0,1120.0
1,1001,11676.354839
2,1002,54470.580645
3,1003,69041.290323
4,1004,18642.741935


In [6]:
bus = pd.merge(bus_stop, bus_all, how='left', left_on='ars_id', right_on ='버스정류장ARS번호')
bus.drop('버스정류장ARS번호', axis=1, inplace=True)
bus['하차총승객수'] =bus['하차총승객수'].fillna(0)

In [7]:
bus

Unnamed: 0,node_id,ars_id,busstop_name,longitude,latitude,하차총승객수
0,100000001,1001,종로2가사거리,126.987786,37.569764,11676.354839
1,100000001,1001,종로2가사거리,126.987786,37.569764,12566.000000
2,100000002,1002,창경궁.서울대학교병원,126.996520,37.579179,54470.580645
3,100000002,1002,창경궁.서울대학교병원,126.996520,37.579179,48688.666667
4,100000003,1003,명륜3가.성대입구,126.998290,37.582709,69041.290323
...,...,...,...,...,...,...
14373,124000334,25995,우성아파트,127.139326,37.550442,0.000000
14374,124000333,25996,우성아파트,127.140052,37.550620,0.000000
14375,124000332,25997,조일약국,127.123477,37.533686,0.000000
14376,124000331,25998,성내시장,127.125410,37.536199,0.000000


In [8]:
bus = bus.apply(juso, axis =1).drop(['node_id','ars_id', 'latitude', 'longitude'], axis=1)
# bus = bus.apply(juso, axis =1)
bus.head()

Unnamed: 0,busstop_name,하차총승객수,gu_name,dong_name,full_name
0,종로2가사거리,11676.354839,종로구,종로1.2.3.4가동,서울특별시 종로구 종로1.2.3.4가동
1,종로2가사거리,12566.0,종로구,종로1.2.3.4가동,서울특별시 종로구 종로1.2.3.4가동
2,창경궁.서울대학교병원,54470.580645,종로구,종로1.2.3.4가동,서울특별시 종로구 종로1.2.3.4가동
3,창경궁.서울대학교병원,48688.666667,종로구,종로1.2.3.4가동,서울특별시 종로구 종로1.2.3.4가동
4,명륜3가.성대입구,69041.290323,종로구,혜화동,서울특별시 종로구 혜화동


In [9]:
bus = bus[bus['full_name'].str.contains('서울특별시')]

In [10]:
bus

Unnamed: 0,busstop_name,하차총승객수,gu_name,dong_name,full_name
0,종로2가사거리,11676.354839,종로구,종로1.2.3.4가동,서울특별시 종로구 종로1.2.3.4가동
1,종로2가사거리,12566.000000,종로구,종로1.2.3.4가동,서울특별시 종로구 종로1.2.3.4가동
2,창경궁.서울대학교병원,54470.580645,종로구,종로1.2.3.4가동,서울특별시 종로구 종로1.2.3.4가동
3,창경궁.서울대학교병원,48688.666667,종로구,종로1.2.3.4가동,서울특별시 종로구 종로1.2.3.4가동
4,명륜3가.성대입구,69041.290323,종로구,혜화동,서울특별시 종로구 혜화동
...,...,...,...,...,...
14373,우성아파트,0.000000,강동구,천호1동,서울특별시 강동구 천호1동
14374,우성아파트,0.000000,강동구,암사1동,서울특별시 강동구 암사1동
14375,조일약국,0.000000,강동구,성내2동,서울특별시 강동구 성내2동
14376,성내시장,0.000000,강동구,성내2동,서울특별시 강동구 성내2동


In [11]:
bus.shape

(14364, 5)

In [12]:
bus.to_csv('busstop.csv',encoding='cp949',index=False)