In [1]:
# import Libraries

import pandas as pd
import numpy as np
import os
import folium

In [2]:
# read csv file
# original data url : https://data.seoul.go.kr/dataList/datasetView.do?infId=OA-12914&srvType=S&serviceKind=1&currentPageNo=1
data_ = pd.read_csv('seoul_subway_data_utf8.csv')
data_.head()

Unnamed: 0,line,station,on,off
0,중앙선,지평,50,41
1,중앙선,용문,3095,3027
2,중앙선,원덕,455,404
3,중앙선,양평,3533,3631
4,중앙선,오빈,354,348


In [3]:
# extract station name as set
station_name = set(data_['station'])

In [4]:
#making dictionary for total amount of passenger at stations
totals = {'station':[],
          'on':[],
          'off':[]}
for s in station_name:
    on = data_[data_['station']==s]['on'].sum()
    off = data_[data_['station']==s]['off'].sum()
    totals['station'].append(s)
    totals['on'].append(on)
    totals['off'].append(off)

In [5]:
total_df = pd.DataFrame(totals)
total_df.head()

Unnamed: 0,station,on,off
0,김포공항,28331,27194
1,연신내,47859,45373
2,남춘천,2833,3097
3,제기동,23674,24198
4,퇴계원,4344,3958


In [40]:
# read location information of stations
station_loc = pd.read_csv('station_loc_utf8.csv')
station_loc.head()

Unnamed: 0,station,lat,lng
0,가락시장,37.492522,127.118234
1,종로3가,37.571607,126.991806
2,오금,37.502162,127.128111
3,동대문,37.57142,127.009745
4,동대문역사문화공원,37.565138,127.007896


In [41]:
# join dataframes
df_fin = total_df.join(station_loc.set_index('station'),on='station')
df_fin.head()

Unnamed: 0,station,on,off,lat,lng
0,김포공항,28331,27194,37.562434,126.801058
1,연신내,47859,45373,37.619001,126.921008
2,남춘천,2833,3097,37.864007,127.723792
3,제기동,23674,24198,37.578103,127.034893
4,퇴계원,4344,3958,37.648311,127.143952


In [42]:
#check NaN values
df_fin[df_fin['lat'].isnull()]

Unnamed: 0,station,on,off,lat,lng


In [43]:
print(df_fin['on'].mean())
print(df_fin['on'].max())
print(df_fin['on'].min())

16886.316733067728
114047
50


In [83]:
import math
map_on = folium.Map(location=[37.566047, 126.977702], tiles='Stamen Toner', zoom_start=15)
for i, row in df_fin.iterrows():
    location = [float(row['lat']),float(row['lng'])]
    folium.Circle(
        location = location,
        radius = int(math.log(row['on'])*25),
        color='ffffff',
        fill_opacity=0.6,
        fill_color='#4DD0E1',
        popup = 'Station : ' + row['station'] + '<br>' +
        'Get on : ' + str("{:,} person/day".format(row['on'])) + '<br>'
        'Get off : ' + str("{:,} person/day".format(row['off'])),
        fill=True, 
    ).add_to(map_on) 
map_on

In [86]:
map_off = folium.Map(location=[37.566047, 126.977702], tiles='Stamen Toner', zoom_start=15)
for i, row in df_fin.iterrows():
    location = [float(row['lat']),float(row['lng'])]
    folium.Circle(
        location = location,
        radius = int(math.log(row['off'])*25),
        color='ffffff',
        fill_opacity=0.6,
        fill_color='#f06292',
        popup = 'Station : ' + row['station'] + '<br>' +
        'Get on : ' + str("{:,} person/day".format(row['on'])) + '<br>'
        'Get off : ' + str("{:,} person/day".format(row['off'])),
        fill=True, 
    ).add_to(map_off) 
map_off