# 🚆Train_timetable
지하철 시간표 데이터 전처리

### TODO
- [X] 서울시 공공데이터를 통해 가능한 호선 데이터 불러오기
- [ ] 가공하기
  - [X] 기본 가공
- [ ] 부족한 데이터셋 구하기
  - [ ] 같은 형식으로 변형

### 0. import

In [221]:
import pandas as pd
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt
from IPython.display import set_matplotlib_formats
import koreanize_matplotlib
import folium
import urllib.request
import json
import time

### 1. 서울시 공공데이터를 통해 가능한 호선 데이터 불러오기

In [222]:
# 작업했던 역번호 데이터 읽어오기
station = pd.read_csv('result_train_station.csv')

station

Unnamed: 0.1,Unnamed: 0,역사명,노선명,주소,위도,경도,역번호,외부코드
0,0,계양,인천1호선,인천광역시 계양구 다남로 24,37.571449,126.735780,3110,I110
1,1,귤현,인천1호선,인천광역시 계양구 장제로 1136,37.566379,126.742654,3111,I111
2,2,박촌,인천1호선,인천광역시 계양구 장제로 992,37.553703,126.745077,3112,I112
3,3,임학,인천1호선,인천광역시 계양구 장제로 875,37.545059,126.738665,3113,I113
4,4,계산,인천1호선,인천광역시 계양구 경명대로 1089,37.543238,126.728128,3114,I114
...,...,...,...,...,...,...,...,...
764,764,옥수,경의중앙선,서울시 성동구 동호로 지하21(옥수동),37.540446,127.018672,,
765,765,응봉,경의중앙선,서울시 성동구 고산자로 123(응봉동),37.549946,127.034538,,
766,766,까치산,2호선,서울특별시 강서구 강서로 54,37.531394,126.846987,0200,
767,767,도라산,경의중앙선,경기도 파주시 장단면 희망로 307,37.898307,126.709193,,


In [223]:
# key 가져오기
from dotenv import load_dotenv
import os

load_dotenv()

url = os.environ.get('train_seoul_timetable_url')

In [224]:
# 함수 정의
# 호선명을 입력받아 해당하는 
def getTimetableByRouteName(station, err) :
    result = pd.DataFrame({
        'LINE_NUM': [],
        'FR_CODE': [],
        'STATION_CD': [],
        'STATION_NM': [],
        'TRAIN_NO': [],
        'ARRIVETIME': [],
        'LEFTTIME': [],
        'ORIGINSTATION': [],
        'DESTSTATION': [],
        'SUBWAYSNAME': [],
        'SUBWAYENAME': [],
        'WEEK_TAG': [], 
        'INOUT_TAG': [], 
        'FL_FLAG': [], 
        'DESTSTATION2': [], 
        'EXPRESS_YN': [], 
        'BRANCH_LINE': []
    })
    
    codes = list(station["역번호"])
    
    for code in codes:
        for week in range(1, 4): # 요일
            for inout in range(1, 3): # 상/하행
                
                response = urllib.request.urlopen(url.format(code, week, inout))
                json_str = response.read().decode("utf-8")
                json_obj = json.loads(json_str)
                
                if 'RESULT' in json_obj and json_obj['RESULT']['CODE'] == 'INFO-000':
                    # 정상
                    temp = pd.json_normalize(json_obj['row'])
                    result = pd.concat([result, temp], axis=0)
                elif 'SearchSTNTimeTableByIDService' in json_obj and json_obj['SearchSTNTimeTableByIDService']['RESULT']['CODE'] == 'INFO-000':
                    # 정상
                    temp = pd.json_normalize(
                        json_obj['SearchSTNTimeTableByIDService']['row'])
                    result = pd.concat([result, temp], axis=0)
                else :
                    # 없는 경우
                    err.append(
                        {"code": code, "err": json_obj['RESULT']['CODE']})
                
    return result

In [225]:
# 노선 개수 확인
routes = list(set(station["노선명"]))
cntRoutes = len(station["노선명"].value_counts())

errs = {}
results = {}

In [226]:
# # API 호출
# for route in routes:    
#     err = []
    
#     results[route] = getTimetableByRouteName(station[station['노선명'] == route], err)
#     errs[route] = err
    
#     time.sleep(10)

# # 시간표 엑셀로 저장 (반드시 필요, 호출 시간 오래 걸리므로)
# now_routes = ["1호선", "2호선", "3호선", "4호선", "5호선", "6호선", "7호선", "8호선", "9호선"]

# for route in now_routes:
#     results[route].to_csv("timetable/" + route + ".csv")

In [227]:
# # 에러 여부
# errs

In [228]:
# routes = sorted(routes)

# for route in routes:
#     station_cnt = len(set(station['역사명'][station['노선명'] == route]))
#     err_cnt = len(errs[route])
#     if err_cnt != 0:
#         print(route, "역 개수 :", station_cnt)
#         print(route, "오류 개수 :", err_cnt, ", 비교값 :", station_cnt * 6) # 한 역에 대해 API 6번 호출하므로
#         print()
    
# # 확인 결과, 6호선을 제외하고는 다 API가 제공하지 않는 호선임
# # 6호선 몇몇 역들에 대해서만 API 호출하면 됨

In [229]:
# for obj in errs["6호선"]:
#     print(station['역사명'][station['역번호'] == obj['code']])

# # 확인 결과, 해당 방향으로는 원래 열차가 없는 역 -> 응암루프선
# # 그래도, 역 도착 시간을 알기 위해 시간표 정보가 필요

In [230]:
# 엑셀 파일에서 불러오기
now_routes = ["1호선", "2호선", "3호선", "4호선", "5호선", "6호선", "7호선", "8호선", "9호선"]

for route in now_routes:
    results[route] = pd.read_csv("timetable/" + route + ".csv")

  results[route] = pd.read_csv("timetable/" + route + ".csv")
  results[route] = pd.read_csv("timetable/" + route + ".csv")
  results[route] = pd.read_csv("timetable/" + route + ".csv")


In [231]:
# 확인 및 데이터 형식 살펴보기 위한 파일 저장
results["1호선"]

Unnamed: 0.1,Unnamed: 0,LINE_NUM,FR_CODE,STATION_CD,STATION_NM,TRAIN_NO,ARRIVETIME,LEFTTIME,ORIGINSTATION,DESTSTATION,SUBWAYSNAME,SUBWAYENAME,WEEK_TAG,INOUT_TAG,FL_FLAG,DESTSTATION2,EXPRESS_YN,BRANCH_LINE
0,0,01호선,124,158,청량리,K1940,18:54:00,00:00:00,1728,158,천안,청량리,1,1,,,D,
1,1,01호선,124,158,청량리,K630,11:48:00,00:00:00,1728,158,천안,청량리,1,1,,,G,
2,2,01호선,124,158,청량리,K1906,09:40:30,00:00:00,1408,158,신창,청량리,1,1,,,D,
3,3,01호선,124,158,청량리,K1908,10:16:30,00:00:00,1408,158,신창,청량리,1,1,,,D,
4,4,01호선,124,158,청량리,K1910,11:11:30,00:00:00,1728,158,천안,청량리,1,1,,,D,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
81814,104,01호선,161,1812,인천,K107,15:05:30,00:00:00,1916,1812,소요산,인천,3,2,,,G,
81815,105,01호선,161,1812,인천,K105,14:56:30,00:00:00,1909,1812,양주,인천,3,2,,,G,
81816,106,01호선,161,1812,인천,K103,14:49:30,00:00:00,1701,1812,구로,인천,3,2,,,G,
81817,107,01호선,161,1812,인천,K101,14:42:30,00:00:00,1916,1812,소요산,인천,3,2,,,G,


In [232]:
results["1호선"].info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 81819 entries, 0 to 81818
Data columns (total 18 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Unnamed: 0     81819 non-null  int64  
 1   LINE_NUM       81819 non-null  object 
 2   FR_CODE        81819 non-null  object 
 3   STATION_CD     81819 non-null  int64  
 4   STATION_NM     81819 non-null  object 
 5   TRAIN_NO       81819 non-null  object 
 6   ARRIVETIME     81819 non-null  object 
 7   LEFTTIME       81819 non-null  object 
 8   ORIGINSTATION  81819 non-null  int64  
 9   DESTSTATION    81819 non-null  int64  
 10  SUBWAYSNAME    81819 non-null  object 
 11  SUBWAYENAME    81819 non-null  object 
 12  WEEK_TAG       81819 non-null  int64  
 13  INOUT_TAG      81819 non-null  int64  
 14  FL_FLAG        0 non-null      float64
 15  DESTSTATION2   0 non-null      float64
 16  EXPRESS_YN     81819 non-null  object 
 17  BRANCH_LINE    0 non-null      float64
dtypes: flo

In [233]:
results["2호선"]

Unnamed: 0.1,Unnamed: 0,LINE_NUM,FR_CODE,STATION_CD,STATION_NM,TRAIN_NO,ARRIVETIME,LEFTTIME,ORIGINSTATION,DESTSTATION,SUBWAYSNAME,SUBWAYENAME,WEEK_TAG,INOUT_TAG,FL_FLAG,DESTSTATION2,EXPRESS_YN,BRANCH_LINE
0,0,02호선,201,201,시청,2004,05:39:00,05:39:30,239,211,홍대입구,성수,1,1,,,G,
1,1,02호선,201,201,시청,2006,05:50:30,05:51:00,234,211,신도림,성수,1,1,,,G,
2,2,02호선,201,201,시청,2008,05:57:30,05:58:00,234,211,신도림,성수,1,1,,,G,
3,3,02호선,201,201,시청,2010,06:04:00,06:04:30,228,211,서울대입구,성수,1,1,,,G,
4,4,02호선,201,201,시청,2012,06:10:30,06:11:00,234,211,신도림,성수,1,1,,,G,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
61864,101,02호선,234-4,200,까치산,5705,00:00:00,23:02:00,200,234,까치산,신도림,3,2,,,G,
61865,102,02호선,234-4,200,까치산,5707,00:00:00,23:17:00,200,234,까치산,신도림,3,2,,,G,
61866,103,02호선,234-4,200,까치산,5711,00:00:00,23:33:00,200,234,까치산,신도림,3,2,,,G,
61867,104,02호선,234-4,200,까치산,5713,00:00:00,23:47:00,200,234,까치산,신도림,3,2,,,G,


In [234]:
results["2호선"].info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 61869 entries, 0 to 61868
Data columns (total 18 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Unnamed: 0     61869 non-null  int64  
 1   LINE_NUM       61869 non-null  object 
 2   FR_CODE        61869 non-null  object 
 3   STATION_CD     61869 non-null  int64  
 4   STATION_NM     61869 non-null  object 
 5   TRAIN_NO       61869 non-null  int64  
 6   ARRIVETIME     61869 non-null  object 
 7   LEFTTIME       61869 non-null  object 
 8   ORIGINSTATION  61869 non-null  int64  
 9   DESTSTATION    61869 non-null  int64  
 10  SUBWAYSNAME    61869 non-null  object 
 11  SUBWAYENAME    61869 non-null  object 
 12  WEEK_TAG       61869 non-null  int64  
 13  INOUT_TAG      61869 non-null  int64  
 14  FL_FLAG        0 non-null      float64
 15  DESTSTATION2   0 non-null      float64
 16  EXPRESS_YN     61869 non-null  object 
 17  BRANCH_LINE    0 non-null      float64
dtypes: flo

In [235]:
results["3호선"]

Unnamed: 0.1,Unnamed: 0,LINE_NUM,FR_CODE,STATION_CD,STATION_NM,TRAIN_NO,ARRIVETIME,LEFTTIME,ORIGINSTATION,DESTSTATION,SUBWAYSNAME,SUBWAYENAME,WEEK_TAG,INOUT_TAG,FL_FLAG,DESTSTATION2,EXPRESS_YN,BRANCH_LINE
0,0,03호선,319,309,지축,3012,05:59:00,05:59:30,323,1958,약수,대화,1,1,,,G,
1,1,03호선,319,309,지축,3016,06:16:30,06:17:00,334,1958,도곡,대화,1,1,,,G,
2,2,03호선,319,309,지축,3022,06:32:00,06:32:30,339,1958,수서,대화,1,1,,,G,
3,3,03호선,319,309,지축,3024K,06:38:30,06:39:00,342,1958,오금,대화,1,1,,,G,
4,4,03호선,319,309,지축,3026,06:45:30,06:46:00,342,1958,오금,대화,1,1,,,G,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
42768,103,03호선,309,1958,대화,3353,00:00:00,22:34:00,1958,339,대화,수서,3,2,,,G,
42769,104,03호선,309,1958,대화,3355,00:00:00,22:44:00,1958,334,대화,도곡,3,2,,,G,
42770,105,03호선,309,1958,대화,3357,00:00:00,23:03:00,1958,323,대화,약수,3,2,,,G,
42771,106,03호선,309,1958,대화,3359,00:00:00,23:20:00,1958,310,대화,구파발,3,2,,,G,


In [236]:
results["3호선"].info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 42773 entries, 0 to 42772
Data columns (total 18 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Unnamed: 0     42773 non-null  int64  
 1   LINE_NUM       42773 non-null  object 
 2   FR_CODE        42773 non-null  int64  
 3   STATION_CD     42773 non-null  int64  
 4   STATION_NM     42773 non-null  object 
 5   TRAIN_NO       42773 non-null  object 
 6   ARRIVETIME     42773 non-null  object 
 7   LEFTTIME       42773 non-null  object 
 8   ORIGINSTATION  42773 non-null  int64  
 9   DESTSTATION    42773 non-null  int64  
 10  SUBWAYSNAME    42773 non-null  object 
 11  SUBWAYENAME    42773 non-null  object 
 12  WEEK_TAG       42773 non-null  int64  
 13  INOUT_TAG      42773 non-null  int64  
 14  FL_FLAG        0 non-null      float64
 15  DESTSTATION2   0 non-null      float64
 16  EXPRESS_YN     42773 non-null  object 
 17  BRANCH_LINE    0 non-null      float64
dtypes: flo

In [237]:
results["4호선"]

Unnamed: 0.1,Unnamed: 0,LINE_NUM,FR_CODE,STATION_CD,STATION_NM,TRAIN_NO,ARRIVETIME,LEFTTIME,ORIGINSTATION,DESTSTATION,SUBWAYSNAME,SUBWAYENAME,WEEK_TAG,INOUT_TAG,FL_FLAG,DESTSTATION2,EXPRESS_YN,BRANCH_LINE
0,0,04호선,408,408,별내별가람,S4014,05:37:00,05:37:30,409,405,당고개,진접,1,1,,,G,
1,1,04호선,408,408,별내별가람,S4020,05:59:00,05:59:30,409,405,당고개,진접,1,1,,,G,
2,2,04호선,408,408,별내별가람,S4002,06:15:00,06:15:30,426,405,서울역,진접,1,1,,,G,
3,3,04호선,408,408,별내별가람,S4004,06:31:00,06:31:30,434,405,남태령,진접,1,1,,,G,
4,4,04호선,408,408,별내별가람,S4032,06:48:00,06:48:30,409,405,당고개,진접,1,1,,,G,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
47842,106,04호선,443,1458,금정,K4685,23:11:00,23:11:30,409,1762,당고개,오이도,3,2,,,G,
47843,107,04호선,443,1458,금정,K4687,23:27:00,23:27:30,409,1762,당고개,오이도,3,2,,,G,
47844,108,04호선,443,1458,금정,K4329,23:40:00,23:40:30,409,1759,당고개,안산,3,2,,,G,
47845,109,04호선,443,1458,금정,K4331,24:03:00,24:03:30,409,1759,당고개,안산,3,2,,,G,


In [238]:
results["4호선"].info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 47847 entries, 0 to 47846
Data columns (total 18 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Unnamed: 0     47847 non-null  int64  
 1   LINE_NUM       47847 non-null  object 
 2   FR_CODE        47847 non-null  int64  
 3   STATION_CD     47847 non-null  int64  
 4   STATION_NM     47847 non-null  object 
 5   TRAIN_NO       47847 non-null  object 
 6   ARRIVETIME     47847 non-null  object 
 7   LEFTTIME       47847 non-null  object 
 8   ORIGINSTATION  47847 non-null  int64  
 9   DESTSTATION    47847 non-null  int64  
 10  SUBWAYSNAME    47847 non-null  object 
 11  SUBWAYENAME    47847 non-null  object 
 12  WEEK_TAG       47847 non-null  int64  
 13  INOUT_TAG      47847 non-null  int64  
 14  FL_FLAG        0 non-null      float64
 15  DESTSTATION2   0 non-null      float64
 16  EXPRESS_YN     47847 non-null  object 
 17  BRANCH_LINE    0 non-null      float64
dtypes: flo

In [239]:
results["5호선"]

Unnamed: 0.1,Unnamed: 0,LINE_NUM,FR_CODE,STATION_CD,STATION_NM,TRAIN_NO,ARRIVETIME,LEFTTIME,ORIGINSTATION,DESTSTATION,SUBWAYSNAME,SUBWAYENAME,WEEK_TAG,INOUT_TAG,FL_FLAG,DESTSTATION2,EXPRESS_YN,BRANCH_LINE
0,0,05호선,510,2511,방화,5620,17:44:00,00:00:00,2561,2511,마천,방화,1,1,,,G,
1,1,05호선,510,2511,방화,5618,17:31:00,00:00:00,2561,2511,마천,방화,1,1,,,G,
2,2,05호선,510,2511,방화,5616,17:18:00,00:00:00,2561,2511,마천,방화,1,1,,,G,
3,3,05호선,510,2511,방화,5614,17:05:00,00:00:00,2561,2511,마천,방화,1,1,,,G,
4,4,05호선,510,2511,방화,5612,16:52:00,00:00:00,2561,2511,마천,방화,1,1,,,G,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49957,81,05호선,P555,2561,마천,5533,09:21:30,00:00:00,2511,2561,방화,마천,3,2,,,G,
49958,82,05호선,P555,2561,마천,5531,09:08:30,00:00:00,2511,2561,방화,마천,3,2,,,G,
49959,83,05호선,P555,2561,마천,5529,08:55:30,00:00:00,2511,2561,방화,마천,3,2,,,G,
49960,84,05호선,P555,2561,마천,5527,08:42:30,00:00:00,2511,2561,방화,마천,3,2,,,G,


In [240]:
results["5호선"].info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 49962 entries, 0 to 49961
Data columns (total 18 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Unnamed: 0     49962 non-null  int64  
 1   LINE_NUM       49962 non-null  object 
 2   FR_CODE        49962 non-null  object 
 3   STATION_CD     49962 non-null  int64  
 4   STATION_NM     49962 non-null  object 
 5   TRAIN_NO       49962 non-null  int64  
 6   ARRIVETIME     49962 non-null  object 
 7   LEFTTIME       49962 non-null  object 
 8   ORIGINSTATION  49962 non-null  int64  
 9   DESTSTATION    49962 non-null  int64  
 10  SUBWAYSNAME    49962 non-null  object 
 11  SUBWAYENAME    49962 non-null  object 
 12  WEEK_TAG       49962 non-null  int64  
 13  INOUT_TAG      49962 non-null  int64  
 14  FL_FLAG        0 non-null      float64
 15  DESTSTATION2   0 non-null      float64
 16  EXPRESS_YN     49962 non-null  object 
 17  BRANCH_LINE    0 non-null      float64
dtypes: flo

In [241]:
results["6호선"]

Unnamed: 0.1,Unnamed: 0,LINE_NUM,FR_CODE,STATION_CD,STATION_NM,TRAIN_NO,ARRIVETIME,LEFTTIME,ORIGINSTATION,DESTSTATION,SUBWAYSNAME,SUBWAYENAME,WEEK_TAG,INOUT_TAG,FL_FLAG,DESTSTATION2,EXPRESS_YN,BRANCH_LINE
0,0,06호선,610,2611,응암,6316,23:29:30,00:00:00,2649,2611,신내,응암,1,1,,,G,
1,1,06호선,610,2611,응암,6314,23:20:00,00:00:00,2648,2611,봉화산,응암,1,1,,,G,
2,2,06호선,610,2611,응암,6312,23:10:00,00:00:00,2648,2611,봉화산,응암,1,1,,,G,
3,3,06호선,610,2611,응암,6310,23:01:00,00:00:00,2649,2611,신내,응암,1,1,,,G,
4,4,06호선,610,2611,응암,6308,22:52:00,00:00:00,2648,2611,봉화산,응암,1,1,,,G,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
30496,41,06호선,648,2649,신내,6011,06:52:00,00:00:00,2611,2649,응암,신내,3,2,,,G,
30497,42,06호선,648,2649,신내,6007,06:30:20,00:00:00,2617,2649,새절,신내,3,2,,,G,
30498,43,06호선,648,2649,신내,6005,06:16:00,00:00:00,2627,2649,공덕,신내,3,2,,,G,
30499,44,06호선,648,2649,신내,6003,06:03:10,00:00:00,2632,2649,한강진,신내,3,2,,,G,


In [242]:
results["6호선"].info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 30501 entries, 0 to 30500
Data columns (total 18 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Unnamed: 0     30501 non-null  int64  
 1   LINE_NUM       30501 non-null  object 
 2   FR_CODE        30501 non-null  int64  
 3   STATION_CD     30501 non-null  int64  
 4   STATION_NM     30501 non-null  object 
 5   TRAIN_NO       30501 non-null  int64  
 6   ARRIVETIME     30501 non-null  object 
 7   LEFTTIME       30501 non-null  object 
 8   ORIGINSTATION  30501 non-null  int64  
 9   DESTSTATION    30501 non-null  int64  
 10  SUBWAYSNAME    30501 non-null  object 
 11  SUBWAYENAME    30501 non-null  object 
 12  WEEK_TAG       30501 non-null  int64  
 13  INOUT_TAG      30501 non-null  int64  
 14  FL_FLAG        0 non-null      float64
 15  DESTSTATION2   0 non-null      float64
 16  EXPRESS_YN     30501 non-null  object 
 17  BRANCH_LINE    0 non-null      float64
dtypes: flo

In [243]:
results["7호선"]

Unnamed: 0.1,Unnamed: 0,LINE_NUM,FR_CODE,STATION_CD,STATION_NM,TRAIN_NO,ARRIVETIME,LEFTTIME,ORIGINSTATION,DESTSTATION,SUBWAYSNAME,SUBWAYENAME,WEEK_TAG,INOUT_TAG,FL_FLAG,DESTSTATION2,EXPRESS_YN,BRANCH_LINE
0,0,07호선,751,3753,까치울,7016,05:48:00,05:48:30,3763,2711,석남,장암,1,1,,,G,
1,1,07호선,751,3753,까치울,7020,06:03:00,06:03:30,3763,2711,석남,장암,1,1,,,G,
2,2,07호선,751,3753,까치울,7024,06:16:00,06:16:30,3763,2712,석남,도봉산,1,1,,,G,
3,3,07호선,751,3753,까치울,7028,06:29:00,06:29:30,3763,2712,석남,도봉산,1,1,,,G,
4,4,07호선,751,3753,까치울,7032,06:40:30,06:41:00,3763,2712,석남,도봉산,1,1,,,G,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49436,157,07호선,750,2752,온수,7301,22:46:30,22:47:00,2711,3763,장암,석남,3,2,,,G,
49437,158,07호선,750,2752,온수,7305,23:00:00,23:00:30,2712,3763,도봉산,석남,3,2,,,G,
49438,159,07호선,750,2752,온수,7309,23:15:30,23:16:00,2712,3763,도봉산,석남,3,2,,,G,
49439,160,07호선,750,2752,온수,7313,23:31:30,23:32:00,2712,3763,도봉산,석남,3,2,,,G,


In [244]:
results["7호선"].info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 49441 entries, 0 to 49440
Data columns (total 18 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Unnamed: 0     49441 non-null  int64  
 1   LINE_NUM       49441 non-null  object 
 2   FR_CODE        49441 non-null  int64  
 3   STATION_CD     49441 non-null  int64  
 4   STATION_NM     49441 non-null  object 
 5   TRAIN_NO       49441 non-null  int64  
 6   ARRIVETIME     49441 non-null  object 
 7   LEFTTIME       49441 non-null  object 
 8   ORIGINSTATION  49441 non-null  int64  
 9   DESTSTATION    49441 non-null  int64  
 10  SUBWAYSNAME    49441 non-null  object 
 11  SUBWAYENAME    49441 non-null  object 
 12  WEEK_TAG       49441 non-null  int64  
 13  INOUT_TAG      49441 non-null  int64  
 14  FL_FLAG        0 non-null      float64
 15  DESTSTATION2   0 non-null      float64
 16  EXPRESS_YN     49441 non-null  object 
 17  BRANCH_LINE    0 non-null      float64
dtypes: flo

In [245]:
results["8호선"]

Unnamed: 0.1,Unnamed: 0,LINE_NUM,FR_CODE,STATION_CD,STATION_NM,TRAIN_NO,ARRIVETIME,LEFTTIME,ORIGINSTATION,DESTSTATION,SUBWAYSNAME,SUBWAYENAME,WEEK_TAG,INOUT_TAG,FL_FLAG,DESTSTATION2,EXPRESS_YN,BRANCH_LINE
0,0,08호선,810,2811,암사,8200,18:18:00,00:00:00,2827,2811,모란,암사,1,1,,,G,
1,1,08호선,810,2811,암사,8198,18:13:00,00:00:00,2827,2811,모란,암사,1,1,,,G,
2,2,08호선,810,2811,암사,8196,18:08:00,00:00:00,2827,2811,모란,암사,1,1,,,G,
3,3,08호선,810,2811,암사,8194,18:03:00,00:00:00,2827,2811,모란,암사,1,1,,,G,
4,4,08호선,810,2811,암사,8192,17:57:00,00:00:00,2827,2811,모란,암사,1,1,,,G,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14192,120,08호선,827,2827,모란,8051,09:39:00,00:00:00,2811,2827,암사,모란,3,2,,,G,
14193,121,08호선,827,2827,모란,8049,09:30:30,00:00:00,2811,2827,암사,모란,3,2,,,G,
14194,122,08호선,827,2827,모란,8047,09:22:00,00:00:00,2811,2827,암사,모란,3,2,,,G,
14195,123,08호선,827,2827,모란,8045,09:13:30,00:00:00,2811,2827,암사,모란,3,2,,,G,


In [246]:
results["8호선"].info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14197 entries, 0 to 14196
Data columns (total 18 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Unnamed: 0     14197 non-null  int64  
 1   LINE_NUM       14197 non-null  object 
 2   FR_CODE        14197 non-null  int64  
 3   STATION_CD     14197 non-null  int64  
 4   STATION_NM     14197 non-null  object 
 5   TRAIN_NO       14197 non-null  int64  
 6   ARRIVETIME     14197 non-null  object 
 7   LEFTTIME       14197 non-null  object 
 8   ORIGINSTATION  14197 non-null  int64  
 9   DESTSTATION    14197 non-null  int64  
 10  SUBWAYSNAME    14197 non-null  object 
 11  SUBWAYENAME    14197 non-null  object 
 12  WEEK_TAG       14197 non-null  int64  
 13  INOUT_TAG      14197 non-null  int64  
 14  FL_FLAG        0 non-null      float64
 15  DESTSTATION2   0 non-null      float64
 16  EXPRESS_YN     14197 non-null  object 
 17  BRANCH_LINE    0 non-null      float64
dtypes: flo

In [247]:
results["9호선"]

Unnamed: 0.1,Unnamed: 0,LINE_NUM,FR_CODE,STATION_CD,STATION_NM,TRAIN_NO,ARRIVETIME,LEFTTIME,ORIGINSTATION,DESTSTATION,SUBWAYSNAME,SUBWAYENAME,WEEK_TAG,INOUT_TAG,FL_FLAG,DESTSTATION2,EXPRESS_YN,BRANCH_LINE
0,0,09호선,901,4101,개화,C9014,00:00:00,05:30:00,4101,4138,개화,중앙보훈병원,1,1,,,G,
1,1,09호선,901,4101,개화,C9016,00:00:00,05:42:00,4101,4138,개화,중앙보훈병원,1,1,,,G,
2,2,09호선,901,4101,개화,C9018,00:00:00,05:54:00,4101,4138,개화,중앙보훈병원,1,1,,,G,
3,3,09호선,901,4101,개화,C9020,00:00:00,06:06:00,4101,4138,개화,중앙보훈병원,1,1,,,G,
4,4,09호선,901,4101,개화,C9022,00:00:00,06:14:15,4101,4138,개화,중앙보훈병원,1,1,,,G,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32889,91,09호선,926,4126,언주,C9191,23:08:50,23:09:15,4138,4101,중앙보훈병원,개화,3,2,,,G,
32890,92,09호선,926,4126,언주,C9193,23:23:50,23:24:15,4138,4110,중앙보훈병원,염창,3,2,,,G,
32891,93,09호선,926,4126,언주,C9195,23:34:45,23:35:10,4138,4115,중앙보훈병원,여의도,3,2,,,G,
32892,94,09호선,926,4126,언주,C9197,23:48:45,23:49:10,4138,4120,중앙보훈병원,동작,3,2,,,G,


In [248]:
results["9호선"].info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 32894 entries, 0 to 32893
Data columns (total 18 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Unnamed: 0     32894 non-null  int64  
 1   LINE_NUM       32894 non-null  object 
 2   FR_CODE        32894 non-null  int64  
 3   STATION_CD     32894 non-null  int64  
 4   STATION_NM     32894 non-null  object 
 5   TRAIN_NO       32894 non-null  object 
 6   ARRIVETIME     32894 non-null  object 
 7   LEFTTIME       32894 non-null  object 
 8   ORIGINSTATION  32894 non-null  int64  
 9   DESTSTATION    32894 non-null  int64  
 10  SUBWAYSNAME    32894 non-null  object 
 11  SUBWAYENAME    32894 non-null  object 
 12  WEEK_TAG       32894 non-null  int64  
 13  INOUT_TAG      32894 non-null  int64  
 14  FL_FLAG        0 non-null      float64
 15  DESTSTATION2   0 non-null      float64
 16  EXPRESS_YN     32894 non-null  object 
 17  BRANCH_LINE    0 non-null      float64
dtypes: flo

### 2. 가공하기

In [249]:
for route in now_routes:
    # 컬럼명 변경
    results[route].rename(columns={
        "LINE_NUM": "노선명", 
        "FR_CODE": "외부코드", 
        "STATION_CD": "역번호",
        "STATION_NM": "역사명",
        "TRAIN_NO": "열차번호", 
        "ARRIVETIME": "도착시간", 
        "LEFTTIME": "출발시간", 
        "ORIGINSTATION": "출발역번호",
        "DESTSTATION": "도착역번호",
        "SUBWAYSNAME": "출발역사명",
        "SUBWAYENAME": "도착역사명",
        "WEEK_TAG": "요일",
        "INOUT_TAG": "방향",
        "EXPRESS_YN": "급행선"
    }, inplace=True)
    
    # 노선명 앞의 0 제거
    results[route]['노선명'] = results[route]['노선명'].str.replace(
        pat=r'^0', repl=r'', regex=True)
    
    # 필요없는 컬럼 삭제
    results[route].drop(columns=["Unnamed: 0", "FL_FLAG",
                        "DESTSTATION2", "BRANCH_LINE"], inplace=True)
    
    # 데이터 타입 통일
    results[route] = results[route].astype({'외부코드': 'str', '열차번호': 'str'})
    
    # 시간 데이터
    # 1. 도착시간 없이 출발시간만 있는 경우, 출발시간 = 도착시간
    results[route]["도착시간"][(results[route]["도착시간"] == "00:00:00") &
                           (results[route]["출발시간"] != "00:00:00")] = results[route]["출발시간"][(results[route]["도착시간"] == "00:00:00") &
                                                                                            (results[route]["출발시간"] != "00:00:00")]
    # 2. 출발시간 삭제
    results[route].drop(columns=["출발시간"], inplace=True)
    # 3. 초단위 버림
    results[route]["도착시간"] = results[route]["도착시간"].str.slice(start=0, stop=5)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  results[route]["도착시간"][(results[route]["도착시간"] == "00:00:00") &
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  results[route]["도착시간"][(results[route]["도착시간"] == "00:00:00") &
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  results[route]["도착시간"][(results[route]["도착시간"] == "00:00:00") &
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning

In [250]:
results["5호선"]

Unnamed: 0,노선명,외부코드,역번호,역사명,열차번호,도착시간,출발역번호,도착역번호,출발역사명,도착역사명,요일,방향,급행선
0,5호선,510,2511,방화,5620,17:44,2561,2511,마천,방화,1,1,G
1,5호선,510,2511,방화,5618,17:31,2561,2511,마천,방화,1,1,G
2,5호선,510,2511,방화,5616,17:18,2561,2511,마천,방화,1,1,G
3,5호선,510,2511,방화,5614,17:05,2561,2511,마천,방화,1,1,G
4,5호선,510,2511,방화,5612,16:52,2561,2511,마천,방화,1,1,G
...,...,...,...,...,...,...,...,...,...,...,...,...,...
49957,5호선,P555,2561,마천,5533,09:21,2511,2561,방화,마천,3,2,G
49958,5호선,P555,2561,마천,5531,09:08,2511,2561,방화,마천,3,2,G
49959,5호선,P555,2561,마천,5529,08:55,2511,2561,방화,마천,3,2,G
49960,5호선,P555,2561,마천,5527,08:42,2511,2561,방화,마천,3,2,G


In [253]:
# 테이블 저장
now_routes = ["1호선", "2호선", "3호선", "4호선", "5호선", "6호선", "7호선", "8호선", "9호선"]

for route in now_routes:
    results[route].to_csv("timetable/result/" + route + ".csv")