# 🗺️ station
DB에 저장할 형식의 버스와 지하철역 데이터 만들기

### TODO
- [X] 버스 route 데이터를 막차 시간과 join
- [X] route에 포함된 버스역만 추려내기
- [X] 버스 막차 시간표 추려내기
- [X] 모든 역데이터에 geohash 적용
- [ ] 각 역별 막차시간 생성

### 0. import

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt
from IPython.display import set_matplotlib_formats
import koreanize_matplotlib
import folium
from folium.plugins import MarkerCluster
import json

### 1. 버스 막차데이터 결측값 제거

In [2]:
bus_station = pd.read_csv('../bus/result_bus_station.csv')
train_station = pd.read_csv("../train/result_train_station_withcode.csv")
bus_route_per_station = pd.read_csv("bus_route_per_station.csv")
lastbus = pd.read_csv('../bus/bus_route_last_time.csv')

In [3]:
lastbus

Unnamed: 0.1,Unnamed: 0,station,arsId,stationNm,busRouteAbrv,busRouteId,busRouteNm,lastTm,dis,speed,sec
0,0,101000308,02920,동대문디자인프라자,TOUR11,100000017,TOUR11,17:10,0.0,0,0
1,0,101000309,02921,방산.중부시장,TOUR11,100000017,TOUR11,17:10,1697.0,109,57
2,0,101000310,02922,을지로3가,TOUR11,100000017,TOUR11,17:12,634.0,20,115
3,0,101000311,02923,을지로입구,TOUR11,100000017,TOUR11,17:15,1003.0,22,165
4,0,100000400,01918,청와대,TOUR11,100000017,TOUR11,17:19,2655.0,37,259
...,...,...,...,...,...,...,...,...,...,...,...
28253,0,232000569,35519,대우아파트,김포16A,241461015,김포16A,22:21,118.0,19,23
28254,0,232000295,35585,대우아파트후문,김포16A,241461015,김포16A,22:22,124.0,16,28
28255,0,232000291,35576,월드메르디앙아파트,김포16A,241461015,김포16A,22:23,519.0,20,94
28256,0,232000856,35803,청도아파트.은행정입구,김포16A,241461015,김포16A,22:24,203.0,13,57


In [4]:
lastbus = lastbus.drop(columns=["Unnamed: 0", "arsId", "dis", "speed", "sec"])
lastbus = lastbus.rename(
    columns={"station": "정류장번호", "stationNm": "정류소명", "busRouteAbrv": "노선명", "busRouteId": "노선번호", "lastTm": "막차시간"})

lastbus

Unnamed: 0,정류장번호,정류소명,노선명,노선번호,busRouteNm,막차시간
0,101000308,동대문디자인프라자,TOUR11,100000017,TOUR11,17:10
1,101000309,방산.중부시장,TOUR11,100000017,TOUR11,17:10
2,101000310,을지로3가,TOUR11,100000017,TOUR11,17:12
3,101000311,을지로입구,TOUR11,100000017,TOUR11,17:15
4,100000400,청와대,TOUR11,100000017,TOUR11,17:19
...,...,...,...,...,...,...
28253,232000569,대우아파트,김포16A,241461015,김포16A,22:21
28254,232000295,대우아파트후문,김포16A,241461015,김포16A,22:22
28255,232000291,월드메르디앙아파트,김포16A,241461015,김포16A,22:23
28256,232000856,청도아파트.은행정입구,김포16A,241461015,김포16A,22:24


In [6]:
lastbus.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 28258 entries, 0 to 28257
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   정류장번호       28258 non-null  int64 
 1   정류소명        28258 non-null  object
 2   노선명         28258 non-null  object
 3   노선번호        28258 non-null  int64 
 4   busRouteNm  28258 non-null  object
 5   막차시간        28258 non-null  object
dtypes: int64(2), object(4)
memory usage: 1.3+ MB


In [8]:
lastbus[lastbus["막차시간"].str.len() != 5]

Unnamed: 0,정류장번호,정류소명,노선명,노선번호,busRouteNm,막차시간


In [9]:
lastbus["막차시간"] = lastbus["막차시간"].str.slice_replace(start=2, stop=3, repl='')

lastbus

Unnamed: 0,정류장번호,정류소명,노선명,노선번호,busRouteNm,막차시간
0,101000308,동대문디자인프라자,TOUR11,100000017,TOUR11,1710
1,101000309,방산.중부시장,TOUR11,100000017,TOUR11,1710
2,101000310,을지로3가,TOUR11,100000017,TOUR11,1712
3,101000311,을지로입구,TOUR11,100000017,TOUR11,1715
4,100000400,청와대,TOUR11,100000017,TOUR11,1719
...,...,...,...,...,...,...
28253,232000569,대우아파트,김포16A,241461015,김포16A,2221
28254,232000295,대우아파트후문,김포16A,241461015,김포16A,2222
28255,232000291,월드메르디앙아파트,김포16A,241461015,김포16A,2223
28256,232000856,청도아파트.은행정입구,김포16A,241461015,김포16A,2224


In [10]:
lastbus.astype({"막차시간": int})

Unnamed: 0,정류장번호,정류소명,노선명,노선번호,busRouteNm,막차시간
0,101000308,동대문디자인프라자,TOUR11,100000017,TOUR11,1710
1,101000309,방산.중부시장,TOUR11,100000017,TOUR11,1710
2,101000310,을지로3가,TOUR11,100000017,TOUR11,1712
3,101000311,을지로입구,TOUR11,100000017,TOUR11,1715
4,100000400,청와대,TOUR11,100000017,TOUR11,1719
...,...,...,...,...,...,...
28253,232000569,대우아파트,김포16A,241461015,김포16A,2221
28254,232000295,대우아파트후문,김포16A,241461015,김포16A,2222
28255,232000291,월드메르디앙아파트,김포16A,241461015,김포16A,2223
28256,232000856,청도아파트.은행정입구,김포16A,241461015,김포16A,2224


In [11]:
bus_route_per_station = bus_route_per_station.rename(
    columns={"ROUTE_ID": "노선번호", "NODE_ID": "정류장번호", "X좌표": "경도", "Y좌표": "위도"})

bus_route_per_station

Unnamed: 0,노선번호,노선명,순번,정류장번호,ARS_ID,정류소명,경도,위도
0,100100585,N37,32,100000001,1001,종로2가사거리,126.987750,37.569765
1,123000010,741,77,100000001,1001,종로2가사거리,126.987750,37.569765
2,100100073,470,66,100000001,1001,종로2가사거리,126.987750,37.569765
3,100100036,171,11,100000002,1002,창경궁.서울대학교병원,126.996566,37.579183
4,100100034,162,14,100000002,1002,창경궁.서울대학교병원,126.996566,37.579183
...,...,...,...,...,...,...,...,...
42060,100100415,6004,1,161000612,92702,인천공항T2-B1층,126.434018,37.466505
42061,100100417,6008,36,161000612,92702,인천공항T2-B1층,126.434018,37.466505
42062,100100418,6011,32,161000612,92702,인천공항T2-B1층,126.434018,37.466505
42063,100100419,6012,31,161000612,92702,인천공항T2-B1층,126.434018,37.466505


In [12]:
result_bus_time = pd.merge(lastbus, bus_route_per_station, on=['노선번호', '정류장번호'], how='left')

result_bus_time

Unnamed: 0,정류장번호,정류소명_x,노선명_x,노선번호,busRouteNm,막차시간,노선명_y,순번,ARS_ID,정류소명_y,경도,위도
0,101000308,동대문디자인프라자,TOUR11,100000017,TOUR11,1710,,,,,,
1,101000309,방산.중부시장,TOUR11,100000017,TOUR11,1710,,,,,,
2,101000310,을지로3가,TOUR11,100000017,TOUR11,1712,,,,,,
3,101000311,을지로입구,TOUR11,100000017,TOUR11,1715,,,,,,
4,100000400,청와대,TOUR11,100000017,TOUR11,1719,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
28608,232000569,대우아파트,김포16A,241461015,김포16A,2221,,,,,,
28609,232000295,대우아파트후문,김포16A,241461015,김포16A,2222,,,,,,
28610,232000291,월드메르디앙아파트,김포16A,241461015,김포16A,2223,,,,,,
28611,232000856,청도아파트.은행정입구,김포16A,241461015,김포16A,2224,,,,,,


In [13]:
result_bus_time[result_bus_time["경도"].isnull()]

Unnamed: 0,정류장번호,정류소명_x,노선명_x,노선번호,busRouteNm,막차시간,노선명_y,순번,ARS_ID,정류소명_y,경도,위도
0,101000308,동대문디자인프라자,TOUR11,100000017,TOUR11,1710,,,,,,
1,101000309,방산.중부시장,TOUR11,100000017,TOUR11,1710,,,,,,
2,101000310,을지로3가,TOUR11,100000017,TOUR11,1712,,,,,,
3,101000311,을지로입구,TOUR11,100000017,TOUR11,1715,,,,,,
4,100000400,청와대,TOUR11,100000017,TOUR11,1719,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
28608,232000569,대우아파트,김포16A,241461015,김포16A,2221,,,,,,
28609,232000295,대우아파트후문,김포16A,241461015,김포16A,2222,,,,,,
28610,232000291,월드메르디앙아파트,김포16A,241461015,김포16A,2223,,,,,,
28611,232000856,청도아파트.은행정입구,김포16A,241461015,김포16A,2224,,,,,,


In [59]:
# set(result_bus_time["노선명_x"][result_bus_time["경도"].isnull()])

In [60]:
# set(result_bus_time["노선명_x"][result_bus_time["경도"].notnull()])

In [16]:
result_bus_time = result_bus_time[result_bus_time["경도"].notnull()]

result_bus_time

Unnamed: 0,정류장번호,정류소명_x,노선명_x,노선번호,busRouteNm,막차시간,노선명_y,순번,ARS_ID,정류소명_y,경도,위도
28,100000419,경복궁역(효자로입구),청와A01,100000020,청와대A01(자율주행),1645,청와대A01(자율주행),1.0,1280.0,경복궁역(효자로입구),126.974399,37.576579
29,100000415,국립고궁박물관,청와A01,100000020,청와대A01(자율주행),1648,청와대A01(자율주행),2.0,1119.0,국립고궁박물관,126.974046,37.578797
30,100000416,청와대,청와A01,100000020,청와대A01(자율주행),1649,청와대A01(자율주행),3.0,1601.0,청와대,126.973778,37.582747
31,100000417,춘추문,청와A01,100000020,청와대A01(자율주행),1650,청와대A01(자율주행),4.0,1602.0,춘추문,126.979731,37.582911
32,100000418,경복궁.국립민속박물관,청와A01,100000020,청와대A01(자율주행),1650,청와대A01(자율주행),5.0,1603.0,경복궁.국립민속박물관,126.979500,37.579532
...,...,...,...,...,...,...,...,...,...,...,...,...
10974,124000457,강동리버스트7.6단지,강동02,124900003,강동02,0051,강동02,73.0,25375.0,강동리버스트7.6단지,127.172374,37.570459
10975,124000458,강동리버스트상가,강동02,124900003,강동02,0052,강동02,74.0,25377.0,강동리버스트상가,127.173789,37.571724
10976,124900130,강동리버스트8단지,강동02,124900003,강동02,0053,강동02,75.0,25763.0,강동리버스트8단지,127.176109,37.572596
10977,124900128,강빛초.중학교,강동02,124900003,강동02,0056,강동02,76.0,25764.0,강빛초.중학교,127.172385,37.574029


In [17]:
result_bus_time = result_bus_time.sort_values(by=["노선번호", "순번"])
result_bus_time = result_bus_time.drop(
    columns=["ARS_ID", "busRouteNm", "정류소명_x", "노선명_x"])
result_bus_time = result_bus_time.rename(columns={"정류소명_y": "정류소명", "노선명_y": "노선명"})
result_bus_time = result_bus_time.reset_index(drop=True)

result_bus_time

Unnamed: 0,정류장번호,노선번호,막차시간,노선명,순번,정류소명,경도,위도
0,100000419,100000020,1645,청와대A01(자율주행),1.0,경복궁역(효자로입구),126.974399,37.576579
1,100000415,100000020,1648,청와대A01(자율주행),2.0,국립고궁박물관,126.974046,37.578797
2,100000416,100000020,1649,청와대A01(자율주행),3.0,청와대,126.973778,37.582747
3,100000417,100000020,1650,청와대A01(자율주행),4.0,춘추문,126.979731,37.582911
4,100000418,100000020,1650,청와대A01(자율주행),5.0,경복궁.국립민속박물관,126.979500,37.579532
...,...,...,...,...,...,...,...,...
10484,124000457,124900003,0051,강동02,73.0,강동리버스트7.6단지,127.172374,37.570459
10485,124000458,124900003,0052,강동02,74.0,강동리버스트상가,127.173789,37.571724
10486,124900130,124900003,0053,강동02,75.0,강동리버스트8단지,127.176109,37.572596
10487,124900128,124900003,0056,강동02,76.0,강빛초.중학교,127.172385,37.574029


In [18]:
result_bus_time.to_csv('result_bus_per_station.csv',
                      index=False, encoding="utf-8-sig");

### 2. route에 포함된 버스역만 추려내기

In [19]:
bus_station

Unnamed: 0.1,Unnamed: 0,정류장번호,정류장명,위도,경도,도시코드,도시명,관리도시명
0,0,GGB100000001,종로2가사거리(중),37.569783,126.987733,11,서울특별시,경기도
1,1,GGB100000002,창경궁.서울대학교병원(중),37.579233,126.996567,11,서울특별시,경기도
2,2,GGB100000003,명륜3가.성대입구(중),37.582700,126.998333,11,서울특별시,경기도
3,3,GGB100000004,종로2가.삼일교(중),37.568683,126.987533,11,서울특별시,경기도
4,4,GGB100000005,혜화동로터리.여운형활동터(중),37.586233,127.001750,11,서울특별시,경기도
...,...,...,...,...,...,...,...,...
62526,62526,SEB274121334,청계산(경유),37.440191,127.060532,11,서울특별시,서울
62527,62527,SEB274199480,판교IC(경유),37.399988,127.100572,11,서울특별시,서울
62528,62528,SEB274199481,판교IC(경유),37.401549,127.098467,11,서울특별시,서울
62529,62529,SEB277103813,오도삼거리(경유),37.744490,126.728584,11,서울특별시,서울


In [24]:
bus_station = bus_station.drop(columns=["Unnamed: 0", "도시코드", "도시명", "관리도시명"])
bus_station["정류장번호"] = bus_station["정류장번호"].map(lambda x: x[3:])

bus_station

Unnamed: 0,정류장번호,정류장명,위도,경도
0,100000001,종로2가사거리(중),37.569783,126.987733
1,100000002,창경궁.서울대학교병원(중),37.579233,126.996567
2,100000003,명륜3가.성대입구(중),37.582700,126.998333
3,100000004,종로2가.삼일교(중),37.568683,126.987533
4,100000005,혜화동로터리.여운형활동터(중),37.586233,127.001750
...,...,...,...,...
62526,274121334,청계산(경유),37.440191,127.060532
62527,274199480,판교IC(경유),37.399988,127.100572
62528,274199481,판교IC(경유),37.401549,127.098467
62529,277103813,오도삼거리(경유),37.744490,126.728584


In [49]:
bus_station_2 = bus_route_per_station.copy()
bus_station_2

Unnamed: 0,노선번호,노선명,순번,정류장번호,ARS_ID,정류소명,경도,위도
0,100100585,N37,32,100000001,1001,종로2가사거리,126.987750,37.569765
1,123000010,741,77,100000001,1001,종로2가사거리,126.987750,37.569765
2,100100073,470,66,100000001,1001,종로2가사거리,126.987750,37.569765
3,100100036,171,11,100000002,1002,창경궁.서울대학교병원,126.996566,37.579183
4,100100034,162,14,100000002,1002,창경궁.서울대학교병원,126.996566,37.579183
...,...,...,...,...,...,...,...,...
42060,100100415,6004,1,161000612,92702,인천공항T2-B1층,126.434018,37.466505
42061,100100417,6008,36,161000612,92702,인천공항T2-B1층,126.434018,37.466505
42062,100100418,6011,32,161000612,92702,인천공항T2-B1층,126.434018,37.466505
42063,100100419,6012,31,161000612,92702,인천공항T2-B1층,126.434018,37.466505


In [50]:
bus_station_2 = bus_station_2.drop(columns=["노선명", "순번", "노선번호", "ARS_ID"])
bus_station_2 = bus_station_2.rename(columns={"정류소명": "정류장명"})
bus_station_2 = bus_station_2.drop_duplicates()
bus_station_2 = bus_station_2.reset_index(drop=True)
bus_station_2

Unnamed: 0,정류장번호,정류장명,경도,위도
0,100000001,종로2가사거리,126.987750,37.569765
1,100000002,창경궁.서울대학교병원,126.996566,37.579183
2,100000003,명륜3가.성대입구,126.998340,37.582671
3,100000004,종로2가.삼일교,126.987613,37.568579
4,100000005,혜화동로터리.여운형활동터,127.001744,37.586243
...,...,...,...,...
11267,161020572,인천공항T2-회전구간(가상),126.429872,37.472970
11268,164020604,연수JC(가상),126.630492,37.409260
11269,164020605,연수JC(가상),126.630545,37.409618
11270,161000611,인천공항T2-3층,126.434431,37.466457


In [51]:
bus_station_concat = pd.concat([bus_station, bus_station_2])
bus_station_concat

Unnamed: 0,정류장번호,정류장명,위도,경도
0,100000001,종로2가사거리(중),37.569783,126.987733
1,100000002,창경궁.서울대학교병원(중),37.579233,126.996567
2,100000003,명륜3가.성대입구(중),37.582700,126.998333
3,100000004,종로2가.삼일교(중),37.568683,126.987533
4,100000005,혜화동로터리.여운형활동터(중),37.586233,127.001750
...,...,...,...,...
11267,161020572,인천공항T2-회전구간(가상),37.472970,126.429872
11268,164020604,연수JC(가상),37.409260,126.630492
11269,164020605,연수JC(가상),37.409618,126.630545
11270,161000611,인천공항T2-3층,37.466457,126.434431


In [52]:
bus_station_concat.drop_duplicates(subset='정류장번호')

Unnamed: 0,정류장번호,정류장명,위도,경도
0,100000001,종로2가사거리(중),37.569783,126.987733
1,100000002,창경궁.서울대학교병원(중),37.579233,126.996567
2,100000003,명륜3가.성대입구(중),37.582700,126.998333
3,100000004,종로2가.삼일교(중),37.568683,126.987533
4,100000005,혜화동로터리.여운형활동터(중),37.586233,127.001750
...,...,...,...,...
11265,164000616,쉐라톤인천호텔,37.389507,126.643765
11266,161000333,공항입구JC,37.505653,126.508723
11267,161020572,인천공항T2-회전구간(가상),37.472970,126.429872
11268,164020604,연수JC(가상),37.409260,126.630492


In [53]:
temp_station = result_bus_time.copy()
temp_station = temp_station.drop(columns=["노선번호", "막차시간", "노선명", "순번", "정류소명",	"경도"])
temp_station

Unnamed: 0,정류장번호,위도
0,100000419,37.576579
1,100000415,37.578797
2,100000416,37.582747
3,100000417,37.582911
4,100000418,37.579532
...,...,...
10484,124000457,37.570459
10485,124000458,37.571724
10486,124900130,37.572596
10487,124900128,37.574029


In [54]:
temp_station = temp_station.drop_duplicates()
temp_station = temp_station.drop(columns=["위도"])

temp_station

Unnamed: 0,정류장번호
0,100000419
1,100000415
2,100000416
3,100000417
4,100000418
...,...
10469,124900120
10480,124900067
10486,124900130
10487,124900128


In [55]:
temp_station = temp_station.astype({"정류장번호": int})
bus_station_concat = bus_station.astype({"정류장번호": int})

In [56]:
result_bus_station = pd.merge(
    temp_station, bus_station_concat, on="정류장번호", how="left")
result_bus_station

Unnamed: 0,정류장번호,정류장명,위도,경도
0,100000419,,,
1,100000415,국립고궁박물관,37.578797,126.974046
2,100000416,청와대,37.582747,126.973778
3,100000417,춘추문,37.582911,126.979731
4,100000418,경복궁.국립민속박물관,37.579532,126.979500
...,...,...,...,...
8123,124900120,고덕1동주민센터.두레근린공원,37.558546,127.151715
8124,124900067,강일리버파크6단지610동,37.565064,127.175907
8125,124900130,강동리버스트8단지,37.572596,127.176109
8126,124900128,강빛초.중학교,37.574029,127.172385


In [57]:
result_bus_station.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 8128 entries, 0 to 8127
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   정류장번호   8128 non-null   int32  
 1   정류장명    7959 non-null   object 
 2   위도      7959 non-null   float64
 3   경도      7959 non-null   float64
dtypes: float64(2), int32(1), object(1)
memory usage: 285.8+ KB


In [58]:
result_bus_station = result_bus_station.dropna()
result_bus_station

Unnamed: 0,정류장번호,정류장명,위도,경도
1,100000415,국립고궁박물관,37.578797,126.974046
2,100000416,청와대,37.582747,126.973778
3,100000417,춘추문,37.582911,126.979731
4,100000418,경복궁.국립민속박물관,37.579532,126.979500
5,108000382,영신여객차고지(우이동종점),37.662829,127.011966
...,...,...,...,...
8123,124900120,고덕1동주민센터.두레근린공원,37.558546,127.151715
8124,124900067,강일리버파크6단지610동,37.565064,127.175907
8125,124900130,강동리버스트8단지,37.572596,127.176109
8126,124900128,강빛초.중학교,37.574029,127.172385


In [61]:
result_bus_station.to_csv('result_bus_station.csv',
                          index=False, encoding="utf-8-sig")

In [67]:
result_bus_time_2 = pd.merge(result_bus_station, result_bus_time,  on='정류장번호', how='left')

result_bus_time_2

Unnamed: 0,정류장번호,정류장명,위도_x,경도_x,노선번호,막차시간,노선명,순번,정류소명,경도_y,위도_y
0,100000415,국립고궁박물관,37.578797,126.974046,100000020,1648,청와대A01(자율주행),2.0,국립고궁박물관,126.974046,37.578797
1,100000416,청와대,37.582747,126.973778,100000020,1649,청와대A01(자율주행),3.0,청와대,126.973778,37.582747
2,100000417,춘추문,37.582911,126.979731,100000020,1650,청와대A01(자율주행),4.0,춘추문,126.979731,37.582911
3,100000418,경복궁.국립민속박물관,37.579532,126.979500,100000020,1650,청와대A01(자율주행),5.0,경복궁.국립민속박물관,126.979500,37.579532
4,108000382,영신여객차고지(우이동종점),37.662829,127.011966,100100014,2320,109,1.0,영신여객차고지(우이동종점),127.011966,37.662829
...,...,...,...,...,...,...,...,...,...,...,...
15352,124900120,고덕1동주민센터.두레근린공원,37.558546,127.151715,124900003,0031,강동02,58.0,고덕1동주민센터.두레근린공원,127.151715,37.558546
15353,124900067,강일리버파크6단지610동,37.565064,127.175907,124900003,0045,강동02,69.0,강일리버파크6단지610동,127.175907,37.565064
15354,124900130,강동리버스트8단지,37.572596,127.176109,124900003,0053,강동02,75.0,강동리버스트8단지,127.176109,37.572596
15355,124900128,강빛초.중학교,37.574029,127.172385,124900003,0056,강동02,76.0,강빛초.중학교,127.172385,37.574029


In [68]:
result_bus_time_2.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 15357 entries, 0 to 15356
Data columns (total 11 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   정류장번호   15357 non-null  int32  
 1   정류장명    15357 non-null  object 
 2   위도_x    15357 non-null  float64
 3   경도_x    15357 non-null  float64
 4   노선번호    15357 non-null  int64  
 5   막차시간    15357 non-null  object 
 6   노선명     15357 non-null  object 
 7   순번      15357 non-null  float64
 8   정류소명    15357 non-null  object 
 9   경도_y    15357 non-null  float64
 10  위도_y    15357 non-null  float64
dtypes: float64(5), int32(1), int64(1), object(4)
memory usage: 1.3+ MB


In [69]:
result_bus_time_2.to_csv('result_bus_per_station.csv',
                           index=False, encoding="utf-8-sig")

### 3. 버스 막차 시간표 추려내기

In [78]:
bus_term = pd.read_csv('../bus/bus_term.csv', encoding='cp949')
bus_term = bus_term.rename(columns={
                           "route_name": "노선명", "route_id": "노선번호", "day": "평일", "sat": "토요일", "sun_holiday": "휴일"})

bus_term

Unnamed: 0,노선명,노선번호,평일,토요일,휴일
0,TOUR11,100000017,40.0,40.0,40.0
1,TOUR12,100000018,65.0,66.0,66.0
2,청와대A01(자율주행),100000020,15.0,,
3,01,100100001,8.0,9.0,9.0
4,101,100100006,10.0,14.0,14.0
...,...,...,...,...,...
661,342,124000038,11.0,12.0,14.0
662,3323,124000039,15.0,20.0,20.0
663,강동05,124900001,12.0,14.0,14.0
664,강동01,124900002,9.0,11.0,13.0


In [86]:
temp_time = result_bus_time.copy()
temp_time = temp_time.drop(
    columns=["정류장번호", "막차시간", "순번", "정류소명", "경도", "위도"])
temp_time = temp_time.drop_duplicates()
temp_time = temp_time.reset_index(drop=True)

temp_time

Unnamed: 0,노선번호,노선명
0,100000020,청와대A01(자율주행)
1,100100014,109
2,100100017,120
3,100100046,270
4,100100047,271
...,...,...
174,124000038,342
175,124000039,3323
176,124900001,강동05
177,124900002,강동01


In [90]:
result_bus_term = pd.merge(temp_time, bus_term, on=["노선번호"], how="left")

result_bus_term

Unnamed: 0,노선번호,노선명_x,노선명_y,평일,토요일,휴일
0,100000020,청와대A01(자율주행),청와대A01(자율주행),15.0,,
1,100100014,109,109,14.0,14.0,16.0
2,100100017,120,120,6.0,10.0,10.0
3,100100046,270,270,10.0,11.0,12.0
4,100100047,271,271,6.0,10.0,10.0
...,...,...,...,...,...,...
174,124000038,342,342,11.0,12.0,14.0
175,124000039,3323,3323,15.0,20.0,20.0
176,124900001,강동05,강동05,12.0,14.0,14.0
177,124900002,강동01,강동01,9.0,11.0,13.0


In [91]:
result_bus_term.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 179 entries, 0 to 178
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   노선번호    179 non-null    int64  
 1   노선명_x   179 non-null    object 
 2   노선명_y   174 non-null    object 
 3   평일      174 non-null    float64
 4   토요일     167 non-null    float64
 5   휴일      167 non-null    float64
dtypes: float64(3), int64(1), object(2)
memory usage: 9.8+ KB


In [92]:
result_bus_term[result_bus_term["노선명_y"].isnull()]

Unnamed: 0,노선번호,노선명_x,노선명_y,평일,토요일,휴일
34,100100417,6008,,,,
79,106000004,8001,,,,
131,115000009,N6002,,,,
172,124000014,6007,,,,
173,124000015,N6001,,,,


In [93]:
# 수동 채우기 

result_bus_term.loc[34] = [100100417, 6008, np.NaN, 70, 70, 70]
result_bus_term = result_bus_term.drop([79, 172])
result_bus_term.loc[131] = [115000009, 'N6002', np.NaN, 20, 20, 20]
result_bus_term.loc[173] = [100100417, 'N6001', np.NaN, 20, 20, 20]

In [94]:
result_bus_term = result_bus_term.drop(columns=["노선명_y"])
result_bus_term = result_bus_term.rename(columns={"노선명_x": "노선명"})

result_bus_term.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 177 entries, 0 to 178
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   노선번호    177 non-null    int64  
 1   노선명     177 non-null    object 
 2   평일      177 non-null    float64
 3   토요일     170 non-null    float64
 4   휴일      170 non-null    float64
dtypes: float64(3), int64(1), object(1)
memory usage: 12.4+ KB


In [95]:
result_bus_term[result_bus_term["토요일"].isnull()]

# 확인 결과, 주말에 운행하지 않는 노선들

Unnamed: 0,노선번호,노선명,평일,토요일,휴일
0,100000020,청와대A01(자율주행),15.0,,
97,110000004,8146,5.0,,
98,110000005,8101,7.0,,
107,111000018,8701,10.0,,
114,112000003,8773,43.0,,
120,113000003,8762,16.0,,
140,116000005,8561,11.0,,


In [97]:
result_bus_term.to_csv("result_bus_term.csv",
                       index=False, encoding="utf-8-sig")


In [100]:
result_bus_time_2[(result_bus_time_2["노선번호"] ==
                   106000004) | (result_bus_time_2["노선번호"] == 124000014)]

Unnamed: 0,정류장번호,정류장명,위도_x,경도_x,노선번호,막차시간,노선명,순번,정류소명,경도_y,위도_y
1890,101000054,충무로역2번출구.대한극장앞,37.561223,126.994917,106000004,0,8001,2.0,충무로역2번출구.대한극장앞,126.994918,37.561223
7383,161000612,인천공항B1층입국장(T2),37.468917,126.434333,124000014,2300,6007,1.0,인천공항T2-B1층,126.434018,37.466505
7415,161000612,인천공항T2-B1층,37.469306,126.43485,124000014,2300,6007,1.0,인천공항T2-B1층,126.434018,37.466505
7447,161000612,인천공항T2-B1층,37.466505,126.434018,124000014,2300,6007,1.0,인천공항T2-B1층,126.434018,37.466505
7475,129000083,인천국제공항,37.448583,126.451483,124000014,2320,6007,2.0,인천공항T1-1층,126.451311,37.448799
7507,129000083,인천공항T1-1층,37.448799,126.45131,124000014,2320,6007,2.0,인천공항T1-1층,126.451311,37.448799
7580,161000325,인천공항T1-3층,37.448483,126.450983,124000014,50,6007,41.0,인천공항T1-3층,126.450156,37.445732
7608,161000325,인천공항T1-3층,37.448535,126.450987,124000014,50,6007,41.0,인천공항T1-3층,126.450156,37.445732
7636,161000325,인천공항T1-3층,37.445732,126.450156,124000014,50,6007,41.0,인천공항T1-3층,126.450156,37.445732
7664,161000611,인천공항3층출국장(T2),37.46875,126.43455,124000014,102,6007,42.0,인천공항T2-3층,126.434431,37.466457


In [102]:
result_bus_time_2 = result_bus_time_2[(result_bus_time_2["노선번호"] !=
                   106000004) & (result_bus_time_2["노선번호"] != 124000014)]
result_bus_time_2

Unnamed: 0,정류장번호,정류장명,위도_x,경도_x,노선번호,막차시간,노선명,순번,정류소명,경도_y,위도_y
0,100000415,국립고궁박물관,37.578797,126.974046,100000020,1648,청와대A01(자율주행),2.0,국립고궁박물관,126.974046,37.578797
1,100000416,청와대,37.582747,126.973778,100000020,1649,청와대A01(자율주행),3.0,청와대,126.973778,37.582747
2,100000417,춘추문,37.582911,126.979731,100000020,1650,청와대A01(자율주행),4.0,춘추문,126.979731,37.582911
3,100000418,경복궁.국립민속박물관,37.579532,126.979500,100000020,1650,청와대A01(자율주행),5.0,경복궁.국립민속박물관,126.979500,37.579532
4,108000382,영신여객차고지(우이동종점),37.662829,127.011966,100100014,2320,109,1.0,영신여객차고지(우이동종점),127.011966,37.662829
...,...,...,...,...,...,...,...,...,...,...,...
15352,124900120,고덕1동주민센터.두레근린공원,37.558546,127.151715,124900003,0031,강동02,58.0,고덕1동주민센터.두레근린공원,127.151715,37.558546
15353,124900067,강일리버파크6단지610동,37.565064,127.175907,124900003,0045,강동02,69.0,강일리버파크6단지610동,127.175907,37.565064
15354,124900130,강동리버스트8단지,37.572596,127.176109,124900003,0053,강동02,75.0,강동리버스트8단지,127.176109,37.572596
15355,124900128,강빛초.중학교,37.574029,127.172385,124900003,0056,강동02,76.0,강빛초.중학교,127.172385,37.574029


In [103]:
result_bus_time_2[(result_bus_time_2["노선번호"] ==
                   106000004) | (result_bus_time_2["노선번호"] == 124000014)]

Unnamed: 0,정류장번호,정류장명,위도_x,경도_x,노선번호,막차시간,노선명,순번,정류소명,경도_y,위도_y


In [104]:
result_bus_time_2.to_csv("result_bus_per_station.csv", index=False, encoding="utf-8-sig")

### 3. 모든 역데이터에 geohash 적용

In [73]:
train_station = train_station.rename(columns={"역사명": "역이름", "역코드": "역번호"})
train_station = train_station.drop(columns=["주소", "노선명"])

train_station

Unnamed: 0,역이름,위도,경도,역번호
0,까치울,37.506130,126.810930,751.0
1,부천종합운동장,37.505020,126.796610,752.0
2,춘의,37.503650,126.788280,753.0
3,신중동,37.502820,126.775660,754.0
4,부천시청,37.504440,126.763640,755.0
...,...,...,...,...
643,한남,37.529430,127.009169,196.0
644,옥수,37.540446,127.018672,195.0
645,응봉,37.549946,127.034538,193.0
646,까치산,37.531394,126.846987,264.0


In [108]:
result_bus_station = result_bus_station.reset_index(drop=True)
result_bus_station = result_bus_station.rename(columns={"정류장명": "역이름", "정류장번호": "역번호"})

result_bus_station

Unnamed: 0,역번호,역이름,위도,경도
0,100000415,국립고궁박물관,37.578797,126.974046
1,100000416,청와대,37.582747,126.973778
2,100000417,춘추문,37.582911,126.979731
3,100000418,경복궁.국립민속박물관,37.579532,126.979500
4,108000382,영신여객차고지(우이동종점),37.662829,127.011966
...,...,...,...,...
7954,124900120,고덕1동주민센터.두레근린공원,37.558546,127.151715
7955,124900067,강일리버파크6단지610동,37.565064,127.175907
7956,124900130,강동리버스트8단지,37.572596,127.176109
7957,124900128,강빛초.중학교,37.574029,127.172385


In [109]:
result_all_station = pd.concat([result_bus_station, train_station])
result_all_station

Unnamed: 0,역번호,역이름,위도,경도
0,100000415.0,국립고궁박물관,37.578797,126.974046
1,100000416.0,청와대,37.582747,126.973778
2,100000417.0,춘추문,37.582911,126.979731
3,100000418.0,경복궁.국립민속박물관,37.579532,126.979500
4,108000382.0,영신여객차고지(우이동종점),37.662829,127.011966
...,...,...,...,...
643,196.0,한남,37.529430,127.009169
644,195.0,옥수,37.540446,127.018672
645,193.0,응봉,37.549946,127.034538
646,264.0,까치산,37.531394,126.846987


In [115]:
import geohash2

In [117]:
result_all_station["geohash"] = np.NaN
result_all_station = result_all_station.reset_index(drop=True)

result_all_station

Unnamed: 0,역번호,역이름,위도,경도,geohash
0,100000415.0,국립고궁박물관,37.578797,126.974046,
1,100000416.0,청와대,37.582747,126.973778,
2,100000417.0,춘추문,37.582911,126.979731,
3,100000418.0,경복궁.국립민속박물관,37.579532,126.979500,
4,108000382.0,영신여객차고지(우이동종점),37.662829,127.011966,
...,...,...,...,...,...
8602,196.0,한남,37.529430,127.009169,
8603,195.0,옥수,37.540446,127.018672,
8604,193.0,응봉,37.549946,127.034538,
8605,264.0,까치산,37.531394,126.846987,


In [118]:
for i in range(len(result_all_station)):
    result_all_station.loc[i, "geohash"] = geohash2.encode(
        result_all_station.loc[i, "위도"], result_all_station.loc[i, "경도"], precision=7)

In [119]:
result_all_station

Unnamed: 0,역번호,역이름,위도,경도,geohash
0,100000415.0,국립고궁박물관,37.578797,126.974046,wydmc35
1,100000416.0,청와대,37.582747,126.973778,wydmc3e
2,100000417.0,춘추문,37.582911,126.979731,wydmc3z
3,100000418.0,경복궁.국립민속박물관,37.579532,126.979500,wydmc3p
4,108000382.0,영신여객차고지(우이동종점),37.662829,127.011966,wydq60r
...,...,...,...,...,...
8602,196.0,한남,37.529430,127.009169,wydmd0j
8603,195.0,옥수,37.540446,127.018672,wydmd6h
8604,193.0,응봉,37.549946,127.034538,wydmdez
8605,264.0,까치산,37.531394,126.846987,wydjw2r


In [120]:
result_all_station.to_csv("result_all_station.csv",
                          index=False, encoding="utf-8-sig")

In [2]:
result_all_station = pd.read_csv("result_all_station.csv")

In [4]:
result_train = result_all_station[result_all_station["stat_id"] < 100000000]
result_bus = result_all_station[result_all_station["stat_id"] >= 100000000]

In [8]:
result_train
result_train.to_csv("result_all_train_station.csv",
                    index=False, encoding="utf-8-sig")

In [9]:
result_bus
result_bus.to_csv("result_all_bus_station.csv",
                    index=False, encoding="utf-8-sig")