In [None]:
!pip install requests

In [None]:
import requests
import xml.etree.ElementTree as ET
import pandas as pd
import time
from datetime import datetime

## Data collection

In [None]:
# API key 입력하기 (16개 역을 10초 간격으로 2시간 수집 시 12개 필요)
keys = ['', '', '', '', '', '', '', '', '', '', '', '']

In [None]:
stations = ['서울', '신도림', '금천구청', '왕십리', '약수', '고속터미널', '명동', '수리산', '명일', '부평', '안암(고대병원앞)', '군포', '금정', '사당', '강남', '성수']
duration_min = 120
interval_sec = 10
today = datetime.now().strftime("%m%d")

dfs = {station: pd.DataFrame() for station in stations}
key_idx = 0

def collect_to_df(station, key):
    try:
        url = f'http://swopenAPI.seoul.go.kr/api/subway/{key}/xml/realtimeStationArrival/0/100/{station}'
        response = requests.get(url)
        response.encoding = 'utf-8'
        root = ET.fromstring(response.content)
        rows = root.findall('row')
    except Exception as e:
        print(f'{station}: request failed ({e})')
        return
    
    if not rows:
        print(f'{station}: no data received')
        return

    records = [{child.tag: child.text for child in row} for row in rows]
    df_new = pd.DataFrame(records)
    dfs[station] = pd.concat([dfs[station], df_new], ignore_index=True)
    print(f'{station}: collected {len(records)} records')

print(f'Starting {interval_sec} seconds interval data collection for {duration_min} minutes...\n')
try:
    total_rounds = (duration_min * 60) // interval_sec

    for i in range(total_rounds):
        for station in stations:
            current_key = keys[key_idx]
            collect_to_df(station, current_key)
            key_idx = (key_idx + 1) % len(keys)
        time.sleep(interval_sec)
    
    for station in stations:
        filename = f'{station}_{today}_morning.csv'
        dfs[station].to_csv(filename, index=False, encoding='utf-8-sig')
        print(f'{station}: saved as {filename}')
    
    print('\nAll data collection complete.')
except KeyboardInterrupt:
    print('\nData collection interrupted by user.')