In [1]:
import os
import requests
import xml.etree.ElementTree as ET
from dotenv import load_dotenv

In [2]:
import numpy as np 
import pandas as pd
import os

import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

In [3]:
# Load environment variables from the .env file
load_dotenv()

# Get the API key from the environment variable
api_key = os.getenv("DATA_SEOUL_APIKEY")

In [4]:

# Define the range of months and years
years = [2023, 2024]
months = list(range(1, 13))  # January to December

# Initialize an empty list to hold the data
all_data = []

# Function to fetch and parse data for a given year and month
def fetch_data(year, month):
    # Format the month to ensure two digits
    month_str = f"{month:02d}"
    date_str = f"{year}{month_str}"
    print(date_str)
    url = f"http://openapi.seoul.go.kr:8088/{api_key}/xml/CardBusTimeNew/1/1000/{date_str}/"
    
    res = requests.get(url)
    
    if res.status_code == 200:
        root = ET.fromstring(res.content)
        data = []
        for item in root.findall('.//row'):
            record = {child.tag: child.text for child in item}
            data.append(record)
        return data
    else:
        print(f"Error: Unable to fetch data for {date_str}. Status code: {res.status_code}")
        return []

In [5]:
# Loop through each year and month to fetch the data
for year in years:
    for month in months:
        if year == 2023 or (year == 2024 and month in range(1, 6)):
            data = fetch_data(year, month)
            all_data.extend(data)

# Convert the list of all records to a DataFrame
df = pd.DataFrame(all_data)

202301
202302
202303
202304
202305
202306
202307
202308
202309
202310
202311
202312
202401
202402
202403
202404
202405


In [6]:
df

Unnamed: 0,USE_MON,BUS_ROUTE_NO,BUS_ROUTE_NM,STND_BSST_ID,BSST_ARS_NO,BUS_STA_NM,MIDNIGHT_RIDE_NUM,MIDNIGHT_ALIGHT_NUM,ONE_RIDE_NUM,ONE_ALIGHT_NUM,...,TWENTY_ALIGHT_NUM,TWENTY_ONE_RIDE_NUM,TWENTY_ONE_ALIGHT_NUM,TWENTY_TWO_RIDE_NUM,TWENTY_TWO_ALIGHT_NUM,TWENTY_THREE_RIDE_NUM,TWENTY_THREE_ALIGHT_NUM,MNTN_TYP_CD,MNTN_TYP_NM,WORK_DT
0,202301,100,100번(하계동~용산구청),110000327,11428,한성여객종점(00001),0,2,0,2,...,0,5,2,3,2,0,0,010,서울간선버스,20230203
1,202301,100,100번(하계동~용산구청),110000335,11436,하계1동주민센터(00010),0,0,0,0,...,36,135,29,150,37,0,0,010,서울간선버스,20230203
2,202301,100,100번(하계동~용산구청),107000011,08101,숭곡초교입구(00100),149,98,16,24,...,243,247,233,158,308,135,192,010,서울간선버스,20230203
3,202301,100,100번(하계동~용산구청),107000012,08102,창문여고(00101),167,220,27,33,...,425,300,491,282,503,196,303,010,서울간선버스,20230203
4,202301,100,100번(하계동~용산구청),107000013,08103,동방고개(00102),11,151,6,32,...,176,45,173,28,204,21,164,010,서울간선버스,20230203
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15995,202404,143,143번(정릉~개포동),990074870,~,대진여객차고지(가상)(00001),0,4,0,0,...,105,236,153,152,127,57,60,010,서울간선버스,20240503
15996,202404,143,143번(정릉~개포동),107000005,08005,미아리고개.미아리예술극장(00010),0,0,0,0,...,468,119,355,40,145,0,0,010,서울간선버스,20240503
15997,202404,143,143번(정릉~개포동),100000133,01229,혜화역.동성중고(장면총리가옥)(00100),412,265,86,31,...,365,854,341,856,354,425,272,010,서울간선버스,20240503
15998,202404,143,143번(정릉~개포동),107000010,08010,삼선교.한성대학교.조소앙활동터(00101),325,462,125,75,...,912,578,875,613,845,300,579,010,서울간선버스,20240503


In [7]:
df.columns

Index([], dtype='object')

In [8]:
# 사용년월 열의 고유한 값들을 모음
unique_usage_months = df['USE_MON'].unique()

# 고유한 사용년월 출력
print(unique_usage_months)

KeyError: 'USE_MON'

In [None]:
# 사용년월 열의 고유한 값들을 모음
bus_route_no = df['BUS_ROUTE_NO'].unique()

# 고유한 사용년월 출력
print(bus_route_no)

In [None]:
# 사용년월 열의 고유한 값들을 모음
bus_route_no = df['BUS_ROUTE_NM'].unique()

# 고유한 사용년월 출력
print(bus_route_no)

In [None]:
# 사용년월 열의 고유한 값들을 모음
unique_usage_months = df = (df['USE_MON'] == 202404)

# 고유한 사용년월 출력
print(unique_usage_months)