# 🚆Train_timetable
지하철 시간표 데이터 전처리

### TODO
- [X] 서울시 공공데이터를 통해 가능한 호선 데이터 불러오기
- [ ] 부족한 데이터셋 구하기

### 0. import

In [14]:
import pandas as pd
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt
from IPython.display import set_matplotlib_formats
import koreanize_matplotlib
import folium
import urllib.request
import json
import time

### 1. 서울시 공공데이터를 통해 가능한 호선 데이터 불러오기

In [9]:
# 작업했던 역번호 데이터 읽어오기
station = pd.read_csv('result_train_station.csv')

station

Unnamed: 0.1,Unnamed: 0,역사명,노선명,주소,위도,경도,역번호,외부코드
0,0,계양,인천1호선,인천광역시 계양구 다남로 24,37.571449,126.735780,3110,I110
1,1,귤현,인천1호선,인천광역시 계양구 장제로 1136,37.566379,126.742654,3111,I111
2,2,박촌,인천1호선,인천광역시 계양구 장제로 992,37.553703,126.745077,3112,I112
3,3,임학,인천1호선,인천광역시 계양구 장제로 875,37.545059,126.738665,3113,I113
4,4,계산,인천1호선,인천광역시 계양구 경명대로 1089,37.543238,126.728128,3114,I114
...,...,...,...,...,...,...,...,...
764,764,옥수,경의중앙선,서울시 성동구 동호로 지하21(옥수동),37.540446,127.018672,,
765,765,응봉,경의중앙선,서울시 성동구 고산자로 123(응봉동),37.549946,127.034538,,
766,766,까치산,2호선,서울특별시 강서구 강서로 54,37.531394,126.846987,0200,
767,767,도라산,경의중앙선,경기도 파주시 장단면 희망로 307,37.898307,126.709193,,


In [10]:
# key 가져오기
from dotenv import load_dotenv
import os

load_dotenv()

url = os.environ.get('train_seoul_timetable_url')

In [11]:
# 함수 정의
# 호선명을 입력받아 해당하는 
def getTimetableByRouteName(station, err) :
    result = pd.DataFrame({
        'LINE_NUM': [],
        'FR_CODE': [],
        'STATION_CD': [],
        'STATION_NM': [],
        'TRAIN_NO': [],
        'ARRIVETIME': [],
        'LEFTTIME': [],
        'ORIGINSTATION': [],
        'DESTSTATION': [],
        'SUBWAYSNAME': [],
        'SUBWAYENAME': [],
        'WEEK_TAG': [], 
        'INOUT_TAG': [], 
        'FL_FLAG': [], 
        'DESTSTATION2': [], 
        'EXPRESS_YN': [], 
        'BRANCH_LINE': []
    })
    
    codes = list(station["역번호"])
    
    for code in codes:
        for week in range(1, 4): # 요일
            for inout in range(1, 3): # 상/하행
                
                response = urllib.request.urlopen(url.format(code, week, inout))
                json_str = response.read().decode("utf-8")
                json_obj = json.loads(json_str)
                
                if 'RESULT' in json_obj and json_obj['RESULT']['CODE'] == 'INFO-000':
                    # 정상
                    temp = pd.json_normalize(json_obj['row'])
                    result = pd.concat([result, temp], axis=0)
                elif 'SearchSTNTimeTableByIDService' in json_obj and json_obj['SearchSTNTimeTableByIDService']['RESULT']['CODE'] == 'INFO-000':
                    # 정상
                    temp = pd.json_normalize(
                        json_obj['SearchSTNTimeTableByIDService']['row'])
                    result = pd.concat([result, temp], axis=0)
                else :
                    # 없는 경우
                    err.append(
                        {"code": code, "err": json_obj['RESULT']['CODE']})
                
    return result

In [12]:
# 노선 개수 확인
routes = list(set(station["노선명"]))
cntRoutes = len(station["노선명"].value_counts())

errs = {}
results = {}

In [16]:
# API 호출
for route in routes:    
    err = []
    
    results[route] = getTimetableByRouteName(station[station['노선명'] == route], err)
    errs[route] = err
    
    time.sleep(10)

In [24]:
# 에러 여부
errs

{'에버라인선': [{'code': '4501', 'err': 'INFO-200'},
  {'code': '4501', 'err': 'INFO-200'},
  {'code': '4501', 'err': 'INFO-200'},
  {'code': '4501', 'err': 'INFO-200'},
  {'code': '4501', 'err': 'INFO-200'},
  {'code': '4501', 'err': 'INFO-200'},
  {'code': '4502', 'err': 'INFO-200'},
  {'code': '4502', 'err': 'INFO-200'},
  {'code': '4502', 'err': 'INFO-200'},
  {'code': '4502', 'err': 'INFO-200'},
  {'code': '4502', 'err': 'INFO-200'},
  {'code': '4502', 'err': 'INFO-200'},
  {'code': '4503', 'err': 'INFO-200'},
  {'code': '4503', 'err': 'INFO-200'},
  {'code': '4503', 'err': 'INFO-200'},
  {'code': '4503', 'err': 'INFO-200'},
  {'code': '4503', 'err': 'INFO-200'},
  {'code': '4503', 'err': 'INFO-200'},
  {'code': '4504', 'err': 'INFO-200'},
  {'code': '4504', 'err': 'INFO-200'},
  {'code': '4504', 'err': 'INFO-200'},
  {'code': '4504', 'err': 'INFO-200'},
  {'code': '4504', 'err': 'INFO-200'},
  {'code': '4504', 'err': 'INFO-200'},
  {'code': '4505', 'err': 'INFO-200'},
  {'code': '4505

In [47]:
routes = sorted(routes)

for route in routes:
    station_cnt = len(set(station['역사명'][station['노선명'] == route]))
    err_cnt = len(errs[route])
    if err_cnt != 0:
        print(route, "역 개수 :", station_cnt)
        print(route, "오류 개수 :", err_cnt, ", 비교값 :", station_cnt * 6) # 한 역에 대해 API 6번 호출하므로
        print()
    
# 확인 결과, 6호선을 제외하고는 다 API가 제공하지 않는 호선임
# 6호선 몇몇 역들에 대해서만 API 호출하면 됨

6호선 역 개수 : 39
6호선 오류 개수 : 15 , 비교값 : 234

경강선 역 개수 : 11
경강선 오류 개수 : 66 , 비교값 : 66

경의중앙선 역 개수 : 58
경의중앙선 오류 개수 : 348 , 비교값 : 348

경춘선 역 개수 : 25
경춘선 오류 개수 : 150 , 비교값 : 150

공항철도선 역 개수 : 14
공항철도선 오류 개수 : 84 , 비교값 : 84

김포골드라인 역 개수 : 10
김포골드라인 오류 개수 : 60 , 비교값 : 60

서해선 역 개수 : 12
서해선 오류 개수 : 72 , 비교값 : 72

수인분당선 역 개수 : 63
수인분당선 오류 개수 : 378 , 비교값 : 378

신림선 역 개수 : 11
신림선 오류 개수 : 66 , 비교값 : 66

신분당선 역 개수 : 16
신분당선 오류 개수 : 96 , 비교값 : 96

에버라인선 역 개수 : 15
에버라인선 오류 개수 : 90 , 비교값 : 90

우이신설선 역 개수 : 13
우이신설선 오류 개수 : 78 , 비교값 : 78

의정부선 역 개수 : 15
의정부선 오류 개수 : 90 , 비교값 : 90

인천1호선 역 개수 : 30
인천1호선 오류 개수 : 180 , 비교값 : 180

인천2호선 역 개수 : 27
인천2호선 오류 개수 : 162 , 비교값 : 162



In [39]:
for obj in errs["6호선"]:
    print(station['역사명'][station['역번호'] == obj['code']])

# 확인 결과, 해당 방향으로는 원래 열차가 없는 역 -> 응암루프선
# 그래도, 역 도착 시간을 알기 위해 시간표 정보가 필요

327    역촌
Name: 역사명, dtype: object
327    역촌
Name: 역사명, dtype: object
327    역촌
Name: 역사명, dtype: object
328    불광
Name: 역사명, dtype: object
328    불광
Name: 역사명, dtype: object
328    불광
Name: 역사명, dtype: object
329    독바위
Name: 역사명, dtype: object
329    독바위
Name: 역사명, dtype: object
329    독바위
Name: 역사명, dtype: object
330    연신내
Name: 역사명, dtype: object
330    연신내
Name: 역사명, dtype: object
330    연신내
Name: 역사명, dtype: object
331    구산
Name: 역사명, dtype: object
331    구산
Name: 역사명, dtype: object
331    구산
Name: 역사명, dtype: object


In [51]:
# 확인 및 데이터 형식 살펴보기 위한
results["1호선"]
results["1호선"].to_csv("timetable/1호선.csv")

Unnamed: 0,LINE_NUM,FR_CODE,STATION_CD,STATION_NM,TRAIN_NO,ARRIVETIME,LEFTTIME,ORIGINSTATION,DESTSTATION,SUBWAYSNAME,SUBWAYENAME,WEEK_TAG,INOUT_TAG,FL_FLAG,DESTSTATION2,EXPRESS_YN,BRANCH_LINE
0,01호선,124,0158,청량리,K1908,10:16:30,00:00:00,1408,0158,신창,청량리,1,1,,,D,
1,01호선,124,0158,청량리,K1930,16:38:00,00:00:00,1728,0158,천안,청량리,1,1,,,D,
2,01호선,124,0158,청량리,K1932,17:00:00,00:00:00,1728,0158,천안,청량리,1,1,,,D,
3,01호선,124,0158,청량리,K1934,17:37:00,00:00:00,1728,0158,천안,청량리,1,1,,,D,
4,01호선,124,0158,청량리,K1936,17:58:00,00:00:00,1728,0158,천안,청량리,1,1,,,D,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
104,01호선,161,1812,인천,S41,09:27:00,00:00:00,0159,1812,동묘앞,인천,3,2,,,G,
105,01호선,161,1812,인천,S33,08:50:30,00:00:00,1909,1812,양주,인천,3,2,,,G,
106,01호선,161,1812,인천,S29,08:26:30,00:00:00,1906,1812,의정부,인천,3,2,,,G,
107,01호선,161,1812,인천,S23,07:54:00,00:00:00,0159,1812,동묘앞,인천,3,2,,,G,


In [19]:
results["2호선"]

Unnamed: 0,LINE_NUM,FR_CODE,STATION_CD,STATION_NM,TRAIN_NO,ARRIVETIME,LEFTTIME,ORIGINSTATION,DESTSTATION,SUBWAYSNAME,SUBWAYENAME,WEEK_TAG,INOUT_TAG,FL_FLAG,DESTSTATION2,EXPRESS_YN,BRANCH_LINE
0,02호선,201,0201,시청,2004,05:39:00,05:39:30,0239,0211,홍대입구,성수,1,1,,,G,
1,02호선,201,0201,시청,2006,05:50:30,05:51:00,0234,0211,신도림,성수,1,1,,,G,
2,02호선,201,0201,시청,2008,05:57:30,05:58:00,0234,0211,신도림,성수,1,1,,,G,
3,02호선,201,0201,시청,2010,06:04:00,06:04:30,0228,0211,서울대입구,성수,1,1,,,G,
4,02호선,201,0201,시청,2012,06:10:30,06:11:00,0234,0211,신도림,성수,1,1,,,G,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
101,02호선,234-4,0200,까치산,5705,00:00:00,23:02:00,0200,0234,까치산,신도림,3,2,,,G,
102,02호선,234-4,0200,까치산,5707,00:00:00,23:17:00,0200,0234,까치산,신도림,3,2,,,G,
103,02호선,234-4,0200,까치산,5711,00:00:00,23:33:00,0200,0234,까치산,신도림,3,2,,,G,
104,02호선,234-4,0200,까치산,5713,00:00:00,23:47:00,0200,0234,까치산,신도림,3,2,,,G,


In [40]:
results["3호선"]

Unnamed: 0,LINE_NUM,FR_CODE,STATION_CD,STATION_NM,TRAIN_NO,ARRIVETIME,LEFTTIME,ORIGINSTATION,DESTSTATION,SUBWAYSNAME,SUBWAYENAME,WEEK_TAG,INOUT_TAG,FL_FLAG,DESTSTATION2,EXPRESS_YN,BRANCH_LINE
0,03호선,319,0309,지축,3012,05:59:00,05:59:30,0323,1958,약수,대화,1,1,,,G,
1,03호선,319,0309,지축,3016,06:16:30,06:17:00,0334,1958,도곡,대화,1,1,,,G,
2,03호선,319,0309,지축,3022,06:32:00,06:32:30,0339,1958,수서,대화,1,1,,,G,
3,03호선,319,0309,지축,3024K,06:38:30,06:39:00,0342,1958,오금,대화,1,1,,,G,
4,03호선,319,0309,지축,3026,06:45:30,06:46:00,0342,1958,오금,대화,1,1,,,G,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
103,03호선,309,1958,대화,3353,00:00:00,22:34:00,1958,0339,대화,수서,3,2,,,G,
104,03호선,309,1958,대화,3355,00:00:00,22:44:00,1958,0334,대화,도곡,3,2,,,G,
105,03호선,309,1958,대화,3357,00:00:00,23:03:00,1958,0323,대화,약수,3,2,,,G,
106,03호선,309,1958,대화,3359,00:00:00,23:20:00,1958,0310,대화,구파발,3,2,,,G,


In [41]:
results["4호선"]

Unnamed: 0,LINE_NUM,FR_CODE,STATION_CD,STATION_NM,TRAIN_NO,ARRIVETIME,LEFTTIME,ORIGINSTATION,DESTSTATION,SUBWAYSNAME,SUBWAYENAME,WEEK_TAG,INOUT_TAG,FL_FLAG,DESTSTATION2,EXPRESS_YN,BRANCH_LINE
0,04호선,408,0408,별내별가람,S4014,05:37:00,05:37:30,0409,0405,당고개,진접,1,1,,,G,
1,04호선,408,0408,별내별가람,S4020,05:59:00,05:59:30,0409,0405,당고개,진접,1,1,,,G,
2,04호선,408,0408,별내별가람,S4002,06:15:00,06:15:30,0426,0405,서울역,진접,1,1,,,G,
3,04호선,408,0408,별내별가람,S4004,06:31:00,06:31:30,0434,0405,남태령,진접,1,1,,,G,
4,04호선,408,0408,별내별가람,S4032,06:48:00,06:48:30,0409,0405,당고개,진접,1,1,,,G,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
106,04호선,443,1458,금정,K4685,23:11:00,23:11:30,0409,1762,당고개,오이도,3,2,,,G,
107,04호선,443,1458,금정,K4687,23:27:00,23:27:30,0409,1762,당고개,오이도,3,2,,,G,
108,04호선,443,1458,금정,K4329,23:40:00,23:40:30,0409,1759,당고개,안산,3,2,,,G,
109,04호선,443,1458,금정,K4331,24:03:00,24:03:30,0409,1759,당고개,안산,3,2,,,G,


In [42]:
results["5호선"]

Unnamed: 0,LINE_NUM,FR_CODE,STATION_CD,STATION_NM,TRAIN_NO,ARRIVETIME,LEFTTIME,ORIGINSTATION,DESTSTATION,SUBWAYSNAME,SUBWAYENAME,WEEK_TAG,INOUT_TAG,FL_FLAG,DESTSTATION2,EXPRESS_YN,BRANCH_LINE
0,05호선,510,2511,방화,5690,24:45:30,00:00:00,2561,2511,마천,방화,1,1,,,G,
1,05호선,510,2511,방화,5688,24:20:00,00:00:00,2561,2511,마천,방화,1,1,,,G,
2,05호선,510,2511,방화,5686,23:58:00,00:00:00,2561,2511,마천,방화,1,1,,,G,
3,05호선,510,2511,방화,5684,23:42:00,00:00:00,2561,2511,마천,방화,1,1,,,G,
4,05호선,510,2511,방화,5682,23:26:00,00:00:00,2561,2511,마천,방화,1,1,,,G,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
81,05호선,P555,2561,마천,5611,17:35:30,00:00:00,2511,2561,방화,마천,3,2,,,G,
82,05호선,P555,2561,마천,5609,17:22:30,00:00:00,2511,2561,방화,마천,3,2,,,G,
83,05호선,P555,2561,마천,5607,17:09:30,00:00:00,2511,2561,방화,마천,3,2,,,G,
84,05호선,P555,2561,마천,5605,16:56:30,00:00:00,2511,2561,방화,마천,3,2,,,G,


In [43]:
results["6호선"]

Unnamed: 0,LINE_NUM,FR_CODE,STATION_CD,STATION_NM,TRAIN_NO,ARRIVETIME,LEFTTIME,ORIGINSTATION,DESTSTATION,SUBWAYSNAME,SUBWAYENAME,WEEK_TAG,INOUT_TAG,FL_FLAG,DESTSTATION2,EXPRESS_YN,BRANCH_LINE
0,06호선,610,2611,응암,6330,24:39:50,00:00:00,2649,2611,신내,응암,1,1,,,G,
1,06호선,610,2611,응암,6328,24:25:00,00:00:00,2649,2611,신내,응암,1,1,,,G,
2,06호선,610,2611,응암,6326,24:16:00,00:00:00,2648,2611,봉화산,응암,1,1,,,G,
3,06호선,610,2611,응암,6324,24:07:00,00:00:00,2649,2611,신내,응암,1,1,,,G,
4,06호선,610,2611,응암,6322,23:57:00,00:00:00,2648,2611,봉화산,응암,1,1,,,G,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
41,06호선,648,2649,신내,6011,06:52:00,00:00:00,2611,2649,응암,신내,3,2,,,G,
42,06호선,648,2649,신내,6007,06:30:20,00:00:00,2617,2649,새절,신내,3,2,,,G,
43,06호선,648,2649,신내,6005,06:16:00,00:00:00,2627,2649,공덕,신내,3,2,,,G,
44,06호선,648,2649,신내,6003,06:03:10,00:00:00,2632,2649,한강진,신내,3,2,,,G,


In [44]:
results["7호선"]

Unnamed: 0,LINE_NUM,FR_CODE,STATION_CD,STATION_NM,TRAIN_NO,ARRIVETIME,LEFTTIME,ORIGINSTATION,DESTSTATION,SUBWAYSNAME,SUBWAYENAME,WEEK_TAG,INOUT_TAG,FL_FLAG,DESTSTATION2,EXPRESS_YN,BRANCH_LINE
0,07호선,751,3753,까치울,7016,05:48:00,05:48:30,3763,2711,석남,장암,1,1,,,G,
1,07호선,751,3753,까치울,7020,06:03:00,06:03:30,3763,2711,석남,장암,1,1,,,G,
2,07호선,751,3753,까치울,7024,06:16:00,06:16:30,3763,2712,석남,도봉산,1,1,,,G,
3,07호선,751,3753,까치울,7028,06:29:00,06:29:30,3763,2712,석남,도봉산,1,1,,,G,
4,07호선,751,3753,까치울,7032,06:40:30,06:41:00,3763,2712,석남,도봉산,1,1,,,G,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
157,07호선,750,2752,온수,7301,22:46:30,22:47:00,2711,3763,장암,석남,3,2,,,G,
158,07호선,750,2752,온수,7305,23:00:00,23:00:30,2712,3763,도봉산,석남,3,2,,,G,
159,07호선,750,2752,온수,7309,23:15:30,23:16:00,2712,3763,도봉산,석남,3,2,,,G,
160,07호선,750,2752,온수,7313,23:31:30,23:32:00,2712,3763,도봉산,석남,3,2,,,G,


In [45]:
results["8호선"]

Unnamed: 0,LINE_NUM,FR_CODE,STATION_CD,STATION_NM,TRAIN_NO,ARRIVETIME,LEFTTIME,ORIGINSTATION,DESTSTATION,SUBWAYSNAME,SUBWAYENAME,WEEK_TAG,INOUT_TAG,FL_FLAG,DESTSTATION2,EXPRESS_YN,BRANCH_LINE
0,08호선,810,2811,암사,8290,25:00:30,00:00:00,2827,2811,모란,암사,1,1,,,G,
1,08호선,810,2811,암사,8288,24:41:30,00:00:00,2827,2811,모란,암사,1,1,,,G,
2,08호선,810,2811,암사,8286,24:25:30,00:00:00,2827,2811,모란,암사,1,1,,,G,
3,08호선,810,2811,암사,8284,24:09:30,00:00:00,2827,2811,모란,암사,1,1,,,G,
4,08호선,810,2811,암사,8282,23:54:30,00:00:00,2827,2811,모란,암사,1,1,,,G,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
120,08호선,827,2827,모란,8009,06:32:30,00:00:00,2811,2827,암사,모란,3,2,,,G,
121,08호선,827,2827,모란,8007,06:22:30,00:00:00,2811,2827,암사,모란,3,2,,,G,
122,08호선,827,2827,모란,8005,06:12:30,00:00:00,2811,2827,암사,모란,3,2,,,G,
123,08호선,827,2827,모란,8003,06:02:30,00:00:00,2811,2827,암사,모란,3,2,,,G,


In [46]:
results["9호선"]

Unnamed: 0,LINE_NUM,FR_CODE,STATION_CD,STATION_NM,TRAIN_NO,ARRIVETIME,LEFTTIME,ORIGINSTATION,DESTSTATION,SUBWAYSNAME,SUBWAYENAME,WEEK_TAG,INOUT_TAG,FL_FLAG,DESTSTATION2,EXPRESS_YN,BRANCH_LINE
0,09호선,901,4101,개화,C9014,00:00:00,05:30:00,4101,4138,개화,중앙보훈병원,1,1,,,G,
1,09호선,901,4101,개화,C9016,00:00:00,05:42:00,4101,4138,개화,중앙보훈병원,1,1,,,G,
2,09호선,901,4101,개화,C9018,00:00:00,05:54:00,4101,4138,개화,중앙보훈병원,1,1,,,G,
3,09호선,901,4101,개화,C9020,00:00:00,06:06:00,4101,4138,개화,중앙보훈병원,1,1,,,G,
4,09호선,901,4101,개화,C9022,00:00:00,06:14:15,4101,4138,개화,중앙보훈병원,1,1,,,G,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
91,09호선,926,4126,언주,C9191,23:08:50,23:09:15,4138,4101,중앙보훈병원,개화,3,2,,,G,
92,09호선,926,4126,언주,C9193,23:23:50,23:24:15,4138,4110,중앙보훈병원,염창,3,2,,,G,
93,09호선,926,4126,언주,C9195,23:34:45,23:35:10,4138,4115,중앙보훈병원,여의도,3,2,,,G,
94,09호선,926,4126,언주,C9197,23:48:45,23:49:10,4138,4120,중앙보훈병원,동작,3,2,,,G,
