In [1]:
# import Libraries

import pandas as pd
import numpy as np
import os
import folium

In [2]:
# read csv file
# original data url : https://data.seoul.go.kr/dataList/datasetView.do?infId=OA-12914&srvType=S&serviceKind=1&currentPageNo=1
data_ = pd.read_csv('seoul_subway_data_utf8.csv')
data_.head()

Unnamed: 0,line,station,on,off
0,중앙선,지평,50,41
1,중앙선,용문,3095,3027
2,중앙선,원덕,455,404
3,중앙선,양평,3533,3631
4,중앙선,오빈,354,348


In [3]:
# extract station name as set
station_name = set(data_['station'])

In [4]:
#making dictionary for total amount of passenger at stations
totals = {'station':[],
          'on':[],
          'off':[]}
for s in station_name:
    on = data_[data_['station']==s]['on'].sum()
    off = data_[data_['station']==s]['off'].sum()
    totals['station'].append(s)
    totals['on'].append(on)
    totals['off'].append(off)

In [5]:
total_df = pd.DataFrame(totals)
total_df.head()

Unnamed: 0,station,on,off
0,불광,28260,30164
1,마곡나루,13833,13363
2,보라매,10967,11163
3,상도,14038,13955
4,사릉,4506,4313


In [8]:
# read location information of stations
station_loc = pd.read_csv('station_loc_utf8.csv')
station_loc.head()

Unnamed: 0,station,lat,lng
0,가락시장,37.492522,127.118234
1,종로3가,37.571607,126.991806
2,오금,37.502162,127.128111
3,동대문,37.57142,127.009745
4,동대문역사문화공원,37.565138,127.007896


In [9]:
# join dataframes
df_fin = total_df.join(station_loc.set_index('station'),on='station')
df_fin.head()

Unnamed: 0,station,on,off,lat,lng
0,불광,28260,30164,37.610469,126.929887
1,마곡나루,13833,13363,37.567336,126.829497
2,보라매,10967,11163,37.499872,126.920428
3,상도,14038,13955,37.502834,126.94791
4,사릉,4506,4313,37.65108,127.176933


In [10]:
#check NaN values
df_fin[df_fin['lat'].isnull()]

Unnamed: 0,station,on,off,lat,lng


In [11]:
print(df_fin['on'].mean())
max_on = df_fin['on'].max()
min_on = df_fin['on'].min()
max_off = df_fin['off'].min()
min_off = df_fin['off'].min()

16886.316733067728


In [23]:
import math
map_on = folium.Map(location=[37.566047, 126.977702], tiles='Stamen Toner', zoom_start=15)

In [20]:
for i, row in df_fin.iterrows():
    location = [float(row['lat']),float(row['lng'])]
    folium.Circle(
        location = location,
        radius = int(math.log(row['off'])*15),
        color='ffffff',
        fill_opacity=0.6,
        fill_color='#f06292',
        popup = 'Station : ' + row['station'] + '<br>' +
        'Get on : ' + str("{:,} person/day".format(row['on'])) + '<br>'
        'Get off : ' + str("{:,} person/day".format(row['off'])),
        fill=True, 
    ).add_to(map_on) 

In [24]:
for i, row in df_fin.iterrows():
    location = [float(row['lat']),float(row['lng'])]
    folium.Circle(
        location = location,
        radius = int(math.log(row['on'])*15),
        color='ffffff',
        fill_opacity=0.6,
        fill_color='#4DD0E1',
        popup = 'Station : ' + row['station'] + '<br>' +
        'Get on : ' + str("{:,} person/day".format(row['on'])) + '<br>'
        'Get off : ' + str("{:,} person/day".format(row['off'])),
        fill=True, 
    ).add_to(map_on) 

In [25]:
map_on

In [26]:
map_on.save('subway.html')