# 지하철 시간대별 데이터 시각화

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import platform
%matplotlib inline

from matplotlib import font_manager, rc
plt.rcParams['axes.unicode_minus'] = False

if platform.system() == 'Darwin':
    rc('font', family='AppleGothic')
elif platform.system() == 'Windows':
    path = 'c:/Windows/Fonts/malgun.ttf'
    font_name = font_manager.FontProperties(fname=path).get_name()
    rc('font', family=font_name)
else:
    print('Unknown System... Sorry~~~')

## 1. 이용 현황 데이터 정제

### (1) 지하철 시간대별 데이터 파일로드
### (2) 데이터 중 0이 들어 있는 데이터 행 제거
### (3) '사용월', '작업일시' 컬럼 삭제
### (4) 데이터프레임에서 2번째 줄 헤더 삭제
### (5) 시간대가 포함된 열명을 '000 승차'로 변경


In [2]:
subway_raw = pd.read_csv('./data/subway_time_202403.csv', encoding='utf-8')
subway_raw.head()

Unnamed: 0,사용월,호선명,역ID,지하철역,04:00:00~04:59:59,Unnamed: 5,05:00:00~05:59:59,Unnamed: 7,06:00:00~06:59:59,Unnamed: 9,...,Unnamed: 43,00:00:00~00:59:59,Unnamed: 45,01:00:00~01:59:59,Unnamed: 47,02:00:00~02:59:59,Unnamed: 49,03:00:00~03:59:59,Unnamed: 51,작업일시
0,,,,,승차,하차,승차,하차,승차,하차,...,하차,승차,하차,승차,하차,승차,하차,승차,하차,
1,2024-03,1호선,150.0,서울역,686,25,8354,8884,13709,59387,...,15365,2922,3882,0,249,0,0,0,0,2024-04-03 10:09:27
2,2024-03,1호선,151.0,시청,90,2,2209,5148,4267,26852,...,4609,588,1325,0,44,0,0,0,0,2024-04-03 10:09:27
3,2024-03,1호선,152.0,종각,166,8,3937,5736,4593,30251,...,4695,1724,1489,0,0,0,0,0,0,2024-04-03 10:09:27
4,2024-03,1호선,153.0,종로3가,181,14,4155,2562,3762,12688,...,5336,1532,2489,0,43,0,0,0,0,2024-04-03 10:09:27


In [3]:
subway_raw.drop(['사용월','작업일시'], axis = 1, inplace=True)

In [4]:
subway_raw

Unnamed: 0,호선명,역ID,지하철역,04:00:00~04:59:59,Unnamed: 5,05:00:00~05:59:59,Unnamed: 7,06:00:00~06:59:59,Unnamed: 9,07:00:00~07:59:59,...,23:00:00~23:59:59,Unnamed: 43,00:00:00~00:59:59,Unnamed: 45,01:00:00~01:59:59,Unnamed: 47,02:00:00~02:59:59,Unnamed: 49,03:00:00~03:59:59,Unnamed: 51
0,,,,승차,하차,승차,하차,승차,하차,승차,...,승차,하차,승차,하차,승차,하차,승차,하차,승차,하차
1,1호선,150.0,서울역,686,25,8354,8884,13709,59387,43653,...,27169,15365,2922,3882,0,249,0,0,0,0
2,1호선,151.0,시청,90,2,2209,5148,4267,26852,8516,...,10115,4609,588,1325,0,44,0,0,0,0
3,1호선,152.0,종각,166,8,3937,5736,4593,30251,6932,...,27626,4695,1724,1489,0,0,0,0,0,0
4,1호선,153.0,종로3가,181,14,4155,2562,3762,12688,6021,...,17041,5336,1532,2489,0,43,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
618,신림선,4407.0,당곡,1,0,2138,222,4505,1351,13953,...,1021,4254,219,1343,0,0,0,0,0,0
619,신림선,4408.0,신림,3,0,1056,285,1484,822,4465,...,885,2061,320,685,0,3,0,0,0,0
620,신림선,4409.0,서원,0,0,2351,276,4480,800,14425,...,489,4522,114,1502,2,2,0,0,0,0
621,신림선,4410.0,서울대벤처타운,10,0,6230,539,10451,4122,32093,...,2122,12669,544,5434,0,177,0,0,0,0


In [5]:
for n in range(3, len(subway_raw.columns)):
    if 'Unnamed' in subway_raw.columns[n]:
        prev_column_name = subway_raw.columns[n-1]
        if '승차' in prev_column_name:
            prev_column_name = prev_column_name.replace(' 승차', '')
        subway_raw.rename(columns={subway_raw.columns[n]: str(prev_column_name) + ' ' + str(subway_raw.iloc[0, n])} , inplace=True)
    else:
        subway_raw.rename(columns={subway_raw.columns[n]: str(subway_raw.columns[n]) + ' ' + str(subway_raw.iloc[0, n])} , inplace=True)

subway_raw

Unnamed: 0,호선명,역ID,지하철역,04:00:00~04:59:59 승차,04:00:00~04:59:59 하차,05:00:00~05:59:59 승차,05:00:00~05:59:59 하차,06:00:00~06:59:59 승차,06:00:00~06:59:59 하차,07:00:00~07:59:59 승차,...,23:00:00~23:59:59 승차,23:00:00~23:59:59 하차,00:00:00~00:59:59 승차,00:00:00~00:59:59 하차,01:00:00~01:59:59 승차,01:00:00~01:59:59 하차,02:00:00~02:59:59 승차,02:00:00~02:59:59 하차,03:00:00~03:59:59 승차,03:00:00~03:59:59 하차
0,,,,승차,하차,승차,하차,승차,하차,승차,...,승차,하차,승차,하차,승차,하차,승차,하차,승차,하차
1,1호선,150.0,서울역,686,25,8354,8884,13709,59387,43653,...,27169,15365,2922,3882,0,249,0,0,0,0
2,1호선,151.0,시청,90,2,2209,5148,4267,26852,8516,...,10115,4609,588,1325,0,44,0,0,0,0
3,1호선,152.0,종각,166,8,3937,5736,4593,30251,6932,...,27626,4695,1724,1489,0,0,0,0,0,0
4,1호선,153.0,종로3가,181,14,4155,2562,3762,12688,6021,...,17041,5336,1532,2489,0,43,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
618,신림선,4407.0,당곡,1,0,2138,222,4505,1351,13953,...,1021,4254,219,1343,0,0,0,0,0,0
619,신림선,4408.0,신림,3,0,1056,285,1484,822,4465,...,885,2061,320,685,0,3,0,0,0,0
620,신림선,4409.0,서원,0,0,2351,276,4480,800,14425,...,489,4522,114,1502,2,2,0,0,0,0
621,신림선,4410.0,서울대벤처타운,10,0,6230,539,10451,4122,32093,...,2122,12669,544,5434,0,177,0,0,0,0


In [6]:
subway_raw.drop(0, axis = 0,  inplace =True)

In [7]:
subway_copy = subway_raw.copy()

In [8]:
subway_copy.head()

Unnamed: 0,호선명,역ID,지하철역,04:00:00~04:59:59 승차,04:00:00~04:59:59 하차,05:00:00~05:59:59 승차,05:00:00~05:59:59 하차,06:00:00~06:59:59 승차,06:00:00~06:59:59 하차,07:00:00~07:59:59 승차,...,23:00:00~23:59:59 승차,23:00:00~23:59:59 하차,00:00:00~00:59:59 승차,00:00:00~00:59:59 하차,01:00:00~01:59:59 승차,01:00:00~01:59:59 하차,02:00:00~02:59:59 승차,02:00:00~02:59:59 하차,03:00:00~03:59:59 승차,03:00:00~03:59:59 하차
1,1호선,150.0,서울역,686,25,8354,8884,13709,59387,43653,...,27169,15365,2922,3882,0,249,0,0,0,0
2,1호선,151.0,시청,90,2,2209,5148,4267,26852,8516,...,10115,4609,588,1325,0,44,0,0,0,0
3,1호선,152.0,종각,166,8,3937,5736,4593,30251,6932,...,27626,4695,1724,1489,0,0,0,0,0,0
4,1호선,153.0,종로3가,181,14,4155,2562,3762,12688,6021,...,17041,5336,1532,2489,0,43,0,0,0,0
5,1호선,154.0,종로5가,58,1,2084,3639,3293,16308,6024,...,6554,4121,401,1456,0,1,0,0,0,0


In [79]:
import re
from pandas.api.types import is_string_dtype

for n in subway_copy.columns:
    if '~' in n:
        if is_string_dtype(subway_copy[n]):
            subway_copy[n] = subway_copy[n].str.replace(',','').astype('int64')

subway_copy['역ID'] = subway_copy['역ID'].astype('int64')

In [40]:
subway_copy.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 622 entries, 1 to 622
Data columns (total 53 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   호선명                   622 non-null    object
 1   역ID                   622 non-null    int64 
 2   지하철역                  622 non-null    object
 3   04:00:00~04:59:59 승차  622 non-null    int64 
 4   04:00:00~04:59:59 하차  622 non-null    int64 
 5   05:00:00~05:59:59 승차  622 non-null    int64 
 6   05:00:00~05:59:59 하차  622 non-null    int64 
 7   06:00:00~06:59:59 승차  622 non-null    int64 
 8   06:00:00~06:59:59 하차  622 non-null    int64 
 9   07:00:00~07:59:59 승차  622 non-null    int64 
 10  07:00:00~07:59:59 하차  622 non-null    int64 
 11  08:00:00~08:59:59 승차  622 non-null    int64 
 12  08:00:00~08:59:59 하차  622 non-null    int64 
 13  09:00:00~09:59:59 승차  622 non-null    int64 
 14  09:00:00~09:59:59 하차  622 non-null    int64 
 15  10:00:00~10:59:59 승차  622 non-null    in

In [70]:
subway_copy['출근시간 승차인원'] = subway_copy['07:00:00~07:59:59 승차'] + subway_copy['08:00:00~08:59:59 승차'] + subway_copy['08:00:00~08:59:59 승차']

In [71]:
subway_copy['출근시간 승차인원']

1      189459
2       32936
3       30252
4       26317
5       27764
        ...  
618     52765
619     18167
620     53487
621    122105
622     18910
Name: 출근시간 승차인원, Length: 622, dtype: int64

In [72]:
subway_copy.sort_values(by='출근시간 승차인원', ascending=False).iloc[0]['지하철역']

'신림'

In [74]:
subway_copy.sort_values(by='출근시간 승차인원', ascending=False).iloc[0]['출근시간 승차인원']

709240

In [75]:
subway_copy.sort_values(by='15:00:00~15:59:59 승차', ascending=False).iloc[0]['지하철역']

'강남'

In [76]:
subway_copy.sort_values(by='15:00:00~15:59:59 하차', ascending=False).iloc[0]['지하철역']

'홍대입구'

In [77]:
subway_copy.sort_values(by='16:00:00~16:59:59 승차', ascending=False).iloc[0]['지하철역']

'강남'

In [78]:
subway_copy.sort_values(by='16:00:00~16:59:59 하차', ascending=False).iloc[0]['지하철역']

'홍대입구'