# 서울시 버스 정류소 위치 표시

# 1 준비작업

## 1.1 Colab 과 GDrive 연동


In [0]:
use_colab = True

In [165]:
if use_colab == True:
  from google.colab import drive
  drive.mount("/content/gdrive")

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


## 1.2 Import

In [0]:
import pandas as pd
import folium
from folium.plugins import MarkerCluster, MiniMap

# 2 지도 표시

## 2.1 버스

#### 2.1.1 버스 정류소 데이타 가져오기

In [168]:
seoul_bus_stations = '/content/gdrive/My Drive/kaggle/data/seoul_bus_coordinate.csv'
bus_boarding_file = '/content/gdrive/My Drive/kaggle/data/BUS_STATION_BOARDING_MONTH_201912_1.csv'
print(seoul_bus_stations)

# utf-8 오류 발생하여 CP949 로 교체함
#bus_stations = pd.read_csv(seoul_bus_stations, encoding = 'utf-8')
bus_stations = pd.read_csv(seoul_bus_stations, encoding ='CP949',dtype='str')
bus_boarding = pd.read_csv(bus_boarding_file, encoding ='CP949')

/content/gdrive/My Drive/kaggle/data/seoul_bus_coordinate.csv


In [169]:
bus_stations.head()

Unnamed: 0,정류소번호,정류소명,X좌표,Y좌표
0,1001,종로2가사거리,126.9877498816,37.5697651251
1,1002,창경궁.서울대학교병원,126.9965660023,37.5791830159
2,1003,명륜3가.성대입구,126.9983401004,37.5826711749
3,1004,종로2가.삼일교,126.9876130976,37.5685792736
4,1005,혜화동로터리,127.001744,37.586243


In [170]:
bus_boarding.head()
bus_boarding[bus_boarding['버스정류장ARS번호'] == '01001']

Unnamed: 0,사용일자,노선ID,노선번호,노선명,표준버스정류장ID,버스정류장ARS번호,역ID,역명,승차총승객수,하차총승객수,등록일자
3134,20191201,11410003,470,470번(상암차고지~안골마을),100000001,01001,8501518,종로2가사거리,128,138,20191204
6126,20191201,41110152,N37,N37번(송파공영차고지~진관공영차고지),100000001,01001,8501518,종로2가사거리,4,15,20191204
6273,20191201,41110153,N37,N37번(진관공영차고지~송파공영차고지),100000001,01001,8501518,종로2가사거리,7,9,20191204
13308,20191201,11110434,741,741번(진관차고지~헌인릉입구),100000001,01001,8501518,종로2가사거리,73,120,20191204
38640,20191202,11410003,470,470번(상암차고지~안골마을),100000001,01001,8501518,종로2가사거리,363,283,20191205
...,...,...,...,...,...,...,...,...,...,...,...
1144044,20191230,11110434,741,741번(진관차고지~헌인릉입구),100000001,01001,8501518,종로2가사거리,261,296,20200102
1155994,20191231,11410003,470,470번(상암차고지~안골마을),100000001,01001,8501518,종로2가사거리,339,394,20200103
1161929,20191231,41110152,N37,N37번(송파공영차고지~진관공영차고지),100000001,01001,8501518,종로2가사거리,3,26,20200103
1162090,20191231,41110153,N37,N37번(진관공영차고지~송파공영차고지),100000001,01001,8501518,종로2가사거리,20,15,20200103


### 2.1.2 지도 표시용 데이타 변환

지도에 표시하기 위해서 필요한 위도, 경도, 정류소 이름을 추출한다.

In [0]:
loc_data = bus_stations[['X좌표','Y좌표','정류소번호','정류소명']].drop_duplicates(keep = 'first')
loc_data = bus_stations.groupby(['정류소번호'])['X좌표','Y좌표','정류소명'].max().reset_index()

In [0]:
loc_data.reset_index(inplace = True) 

In [173]:
loc_data.head()

Unnamed: 0,index,정류소번호,X좌표,Y좌표,정류소명
0,0,1001,126.9877498816,37.5697651251,종로2가사거리
1,1,1002,126.9965660023,37.5791830159,창경궁.서울대학교병원
2,2,1003,126.9983401004,37.5826711749,명륜3가.성대입구
3,3,1004,126.9876130976,37.5685792736,종로2가.삼일교
4,4,1005,127.001744,37.586243,혜화동로터리


In [174]:
loc_data.shape

(11018, 5)

In [0]:
loc_data2 = bus_boarding.groupby(['사용일자','버스정류장ARS번호'])['역명','승차총승객수','하차총승객수'].sum().reset_index()

In [176]:
loc_data2[loc_data2['버스정류장ARS번호']=='01001']

Unnamed: 0,사용일자,버스정류장ARS번호,승차총승객수,하차총승객수
0,20191201,1001,212,282
12667,20191202,1001,624,586
25353,20191203,1001,614,597
38033,20191204,1001,643,623
50718,20191205,1001,597,586
63405,20191206,1001,546,663
76088,20191207,1001,341,560
88772,20191208,1001,216,311
101435,20191209,1001,614,569
114120,20191210,1001,623,577


In [177]:
bus_data = loc_data.set_index('정류소번호').join(loc_data2.set_index('버스정류장ARS번호'))
bus_data

Unnamed: 0,index,X좌표,Y좌표,정류소명,사용일자,승차총승객수,하차총승객수
01001,0,126.9877498816,37.5697651251,종로2가사거리,20191201.0,212.0,282.0
01001,0,126.9877498816,37.5697651251,종로2가사거리,20191202.0,624.0,586.0
01001,0,126.9877498816,37.5697651251,종로2가사거리,20191203.0,614.0,597.0
01001,0,126.9877498816,37.5697651251,종로2가사거리,20191204.0,643.0,623.0
01001,0,126.9877498816,37.5697651251,종로2가사거리,20191205.0,597.0,586.0
...,...,...,...,...,...,...,...
25990,11017,127.1285487091,37.5533184173,암사삼성.현대아파트,20191227.0,116.0,68.0
25990,11017,127.1285487091,37.5533184173,암사삼성.현대아파트,20191228.0,84.0,49.0
25990,11017,127.1285487091,37.5533184173,암사삼성.현대아파트,20191229.0,84.0,24.0
25990,11017,127.1285487091,37.5533184173,암사삼성.현대아파트,20191230.0,141.0,46.0


In [374]:
bus_data_20191201 = bus_data[bus_data['사용일자']==20191201.0]
bus_data_20191201[['X좌표','Y좌표']] == bus_data_20191201[['X좌표','Y좌표']].astype(str)
bus_data_20191201[['사용일자','승차총승객수','하차총승객수']] == bus_data_20191201[['사용일자','승차총승객수','하차총승객수']].astype(str)


Unnamed: 0,사용일자,승차총승객수,하차총승객수
01001,False,False,False
01002,False,False,False
01003,False,False,False
01004,False,False,False
01005,False,False,False
...,...,...,...
25752,False,False,False
25753,False,False,False
25758,False,False,False
25760,False,False,False


## 2.2 지하철

### 2.2.1 지하철 데이타 가져오기

In [335]:
subway_data = '/content/gdrive/My Drive/kaggle/data/subway_addr_comp.csv'
subway_boarding_file = '/content/gdrive/My Drive/kaggle/data/CARD_SUBWAY_MONTH_201912.csv'
print(subway_data)

# utf-8 오류 발생하여 CP949 로 교체함
subway_stations = pd.read_csv(subway_data, encoding ='UTF-8')
subway_boarding = pd.read_csv(subway_boarding_file, encoding ='CP949')

/content/gdrive/My Drive/kaggle/data/subway_addr_comp.csv


In [336]:
print(subway_stations.head())
print(subway_boarding.head())
subway_stations

   Unnamed: 0  sub_station_code  ...   admin_dong    admin_code
0           0              2818  ...         가락1동  1.171063e+09
1           1               340  ...         가락1동  1.171063e+09
2           2              2535  ...  종로1.2.3.4가동  1.111062e+09
3           3               319  ...  종로1.2.3.4가동  1.111062e+09
4           4               153  ...  종로1.2.3.4가동  1.111062e+09

[5 rows x 10 columns]
       사용일자  노선명  역ID    역명  승차총승객수  하차총승객수      등록일자
0  20191201  1호선  152    종각   19093   17141  20191204
1  20191201  1호선  153  종로3가   19646   17772  20191204
2  20191201  1호선  154  종로5가   13716   13149  20191204
3  20191201  1호선  155   동대문   11040   13079  20191204
4  20191201  1호선  156   신설동    8498    8322  20191204


Unnamed: 0.1,Unnamed: 0,sub_station_code,station_name,line_no,x_wgs,y_wgs,raw_dong,raw_code,admin_dong,admin_code
0,0,2818,가락시장,8,127.118234,37.492522,가락동,1.171011e+09,가락1동,1.171063e+09
1,1,340,가락시장,3,127.118234,37.492522,가락동,1.171011e+09,가락1동,1.171063e+09
2,2,2535,종로3가,5,126.991806,37.571607,묘동,1.111015e+09,종로1.2.3.4가동,1.111062e+09
3,3,319,종로3가,3,126.991806,37.571607,묘동,1.111015e+09,종로1.2.3.4가동,1.111062e+09
4,4,153,종로3가,1,126.991806,37.571607,묘동,1.111015e+09,종로1.2.3.4가동,1.111062e+09
...,...,...,...,...,...,...,...,...,...,...
689,924,4505,동백,E,127.152716,37.269043,중동,4.146312e+09,동백2동,4.146358e+09
690,925,4506,초당,E,127.159443,37.260752,중동,4.146312e+09,동백2동,4.146358e+09
691,926,4508,삼가,E,127.168075,37.242115,삼가동,4.146110e+09,역삼동,4.146152e+09
692,927,4509,시청?용인대,E,127.178406,37.239151,삼가동,4.146110e+09,,


### 2.2.2 지도 표시용 데이타 변환

In [0]:
sub_data = subway_stations[['x_wgs','y_wgs','sub_station_code','station_name']].drop_duplicates(keep = 'first')
sub_data = sub_data.groupby(['station_name'])['x_wgs','y_wgs','sub_station_code'].min().reset_index()

In [0]:
sub_data.reset_index(inplace = True) 

In [339]:
subway_boarding[subway_boarding['역명']=='서울역']

Unnamed: 0,사용일자,노선명,역ID,역명,승차총승객수,하차총승객수,등록일자
109,20191201,4호선,426,서울역,6877,18251,20191204
118,20191201,경부선,1001,서울역,8387,1548,20191204
298,20191201,1호선,150,서울역,41057,38106,20191204
300,20191201,경의선,1251,서울역,1405,1312,20191204
550,20191201,공항철도 1호선,4201,서울역,15117,12342,20191204
...,...,...,...,...,...,...,...
17888,20191231,4호선,426,서울역,14197,26906,20200103
17897,20191231,경부선,1001,서울역,7692,1998,20200103
18078,20191231,경의선,1251,서울역,4042,5810,20200103
18103,20191231,경의선,1291,서울역,1,0,20200103


In [0]:
sub_data2 = subway_boarding.groupby(by=['사용일자','역명'])['역ID','승차총승객수','하차총승객수'].agg({'역ID':'min', '승차총승객수':'sum','하차총승객수':'sum'}).reset_index()
#sub_data2['역ID'] = sub_data2['역ID'].astype(str)

In [345]:
sub_data2[sub_data2['역명']=='서울역']

Unnamed: 0,사용일자,역명,역ID,승차총승객수,하차총승객수
247,20191201,서울역,150,72843,71559
756,20191202,서울역,150,98170,97230
1265,20191203,서울역,150,96569,95915
1774,20191204,서울역,150,97310,96390
2283,20191205,서울역,150,102406,101714
2792,20191206,서울역,150,116317,116789
3301,20191207,서울역,150,93935,99374
3810,20191208,서울역,150,73557,70501
4319,20191209,서울역,150,100433,99674
4828,20191210,서울역,150,98348,97545


In [346]:
sub_data[sub_data['station_name']=='서울']
#subway_stations[subway_stations['station_name']=='서울']

Unnamed: 0,index,station_name,x_wgs,y_wgs,sub_station_code
288,288,서울,126.972559,37.554648,150


In [347]:
subway_data = sub_data2.set_index('역ID').join(sub_data.set_index('sub_station_code'))
subway_data.describe()
subway_data[subway_data['역명']=='서울역']
#sub_data[sub_data['sub_station_code']=='152']
#sub_data2[sub_data2['역ID']=='152']

Unnamed: 0,사용일자,역명,승차총승객수,하차총승객수,index,station_name,x_wgs,y_wgs
150,20191201,서울역,72843,71559,288.0,서울,126.972559,37.554648
150,20191202,서울역,98170,97230,288.0,서울,126.972559,37.554648
150,20191203,서울역,96569,95915,288.0,서울,126.972559,37.554648
150,20191204,서울역,97310,96390,288.0,서울,126.972559,37.554648
150,20191205,서울역,102406,101714,288.0,서울,126.972559,37.554648
150,20191206,서울역,116317,116789,288.0,서울,126.972559,37.554648
150,20191207,서울역,93935,99374,288.0,서울,126.972559,37.554648
150,20191208,서울역,73557,70501,288.0,서울,126.972559,37.554648
150,20191209,서울역,100433,99674,288.0,서울,126.972559,37.554648
150,20191210,서울역,98348,97545,288.0,서울,126.972559,37.554648


In [370]:
subway_data_20191201 = subway_data[subway_data['사용일자']==20191201]
subway_data_20191201 = subway_data_20191201.dropna()
subway_data_20191201.head()

Unnamed: 0,사용일자,역명,승차총승객수,하차총승객수,index,station_name,x_wgs,y_wgs
150,20191201,서울역,72843,71559,288.0,서울,126.972559,37.554648
151,20191201,시청,19647,16950,340.0,시청,126.977108,37.564718
152,20191201,종각,19093,17141,500.0,종각,126.982923,37.570161
153,20191201,종로3가,35165,33184,501.0,종로3가,126.991806,37.571607
154,20191201,종로5가,13716,13149,502.0,종로5가,127.001849,37.570926


## 2.3 지도에 정류소 표시

itertuples 를 사용하여 모든 정류소를 표시한다.

In [0]:
# 서울시청 위도, 경도
latitude_value = 37.565954
longitude_value = 126.978023

map_hs = folium.Map((latitude_value, longitude_value), zoom_start = 15)

In [0]:
#승차, 하차 별로 그룹

mcg = folium.plugins.MarkerCluster(control=False)
bus_get_on = folium.plugins.FeatureGroupSubGroup(mcg,'bus_get_on')
bus_get_off = folium.plugins.FeatureGroupSubGroup(mcg,'bus_get_off')
subway_get_on = folium.plugins.FeatureGroupSubGroup(mcg,'subway_get_on')
subway_get_off = folium.plugins.FeatureGroupSubGroup(mcg,'subway_get_off')

In [0]:
for row in bus_data_20191201.itertuples():
  x = float(row.X좌표)
  y = float(row.Y좌표)
  get_on = '승차: ' + str(row.승차총승객수)
  get_off = '하차: ' + str(row.하차총승객수)
  

  map_hs.add_child(mcg)
  map_hs.add_child(bus_get_on)
  map_hs.add_child(bus_get_off)

  get_on_icon=folium.Icon(color='green', icon='angle-up', icon_color="white", prefix='fa')
  get_off_icon=folium.Icon(color='red', icon='angle-down', icon_color="white", prefix='fa')

  bus_get_on.add_child(folium.Marker([y,x],icon=get_on_icon,popup = get_on))
  bus_get_off.add_child(folium.Marker([y,x],icon=get_off_icon,popup = get_off))

In [0]:
for row in subway_data_20191201.itertuples():
  x = float(row.x_wgs)
  y = float(row.y_wgs)
  get_on = '승차: ' + str(row.승차총승객수)
  get_off = '하차: ' + str(row.하차총승객수)

  map_hs.add_child(subway_get_on)
  map_hs.add_child(subway_get_off)

  get_on_icon=folium.Icon(color='blue', icon='angle-up', icon_color="white", prefix='fa')
  get_off_icon=folium.Icon(color='yellow', icon='angle-down', icon_color="white", prefix='fa')

  subway_get_on.add_child(folium.Marker([y,x],icon=get_on_icon,popup = get_on))
  subway_get_off.add_child(folium.Marker([y,x],icon=get_off_icon,popup = get_off))

In [379]:
folium.LayerControl().add_to(map_hs)

<folium.map.LayerControl at 0x7f68d419bac8>

## 2.4 지표 표시 내용을 html 로 저장

In [0]:
map_hs.save('stations.html')

Folium 을 사용할 때 하얗게 출력되면 표시할 것이 많은 경우이므로 html로 저장해서 확인하면 된다.