In [8]:
import xml.etree.ElementTree as ET
import pandas as pd
import os 
import geopandas as gpd

# 00 Setup
def findfiles(filefolderpath, filetype='.csv', recursive=True):
    """
    尋找指定路徑下指定類型的檔案，並返回檔案路徑列表。

    Args:
        filefolderpath (str): 指定的檔案路徑。
        filetype (str, optional): 要尋找的檔案類型，預設為 '.csv'。
        recursive (bool, optional): 是否檢索所有子資料夾，預設為 True；反之為False，僅查找當前資料夾的所有file。

    Returns:
        list: 包含所有符合條件的檔案路徑的列表。
    """
    filelist = []

    if recursive:
        # 遍歷資料夾及其子資料夾
        for root, _, files in os.walk(filefolderpath):
            for file in files:
                if file.endswith(filetype):
                    file_path = os.path.join(root, file)
                    filelist.append(file_path)
    else:
        # 僅檢索當前資料夾
        for file in os.listdir(filefolderpath):
            file_path = os.path.join(filefolderpath, file)
            if os.path.isfile(file_path) and file.endswith(filetype):
                filelist.append(file_path)

    return filelist

# 01 讀取站序
def read_bus_stop_of_route_xml(xml_path: str) -> pd.DataFrame:
    """
    讀取 TDX 公車站序 XML（BusStopOfRoute），回傳整理好的 pandas DataFrame。
    
    每一列 = 一個站牌（Stop），同時附上路線 / 營運業者資訊。
    """
    tree = ET.parse(xml_path)
    root = tree.getroot()

    # 自動從 root 解析出 namespace（避免寫死）
    if root.tag.startswith("{"):
        uri = root.tag.split("}")[0].strip("{")
    else:
        uri = "https://ptx.transportdata.tw/standard/schema/"
    ns = {"ns": uri}

    def gettext(elem, path):
        """安全取 text，找不到就回 None"""
        if elem is None:
            return None
        child = elem.find(path, ns)
        return child.text if child is not None else None

    rows = []

    # 每一個 <BusStopOfRoute> 代表一條路線 + 方向
    for bsr in root.findall("ns:BusStopOfRoute", ns):

        # 路線共同欄位
        base = {
            "RouteUID":          gettext(bsr, "ns:RouteUID"),
            "RouteID":           gettext(bsr, "ns:RouteID"),
            "RouteName_Zh":      gettext(bsr, "ns:RouteName/ns:Zh_tw"),
            "RouteName_En":      gettext(bsr, "ns:RouteName/ns:En"),
            "SubRouteUID":       gettext(bsr, "ns:SubRouteUID"),
            "SubRouteID":        gettext(bsr, "ns:SubRouteID"),
            "SubRouteName_Zh":   gettext(bsr, "ns:SubRouteName/ns:Zh_tw"),
            "SubRouteName_En":   gettext(bsr, "ns:SubRouteName/ns:En"),
            "Direction":         gettext(bsr, "ns:Direction"),
            "City":              gettext(bsr, "ns:City"),
            "CityCode":          gettext(bsr, "ns:CityCode"),
            "OperatorID":        gettext(bsr, "ns:Operators/ns:Operator/ns:OperatorID"),
            "OperatorName_Zh":   gettext(bsr, "ns:Operators/ns:Operator/ns:OperatorName/ns:Zh_tw"),
            "OperatorNo":        gettext(bsr, "ns:Operators/ns:Operator/ns:OperatorNo"),
        }

        # 底下所有 <Stop>
        for stop in bsr.findall("ns:Stops/ns:Stop", ns):
            row = base.copy()
            row.update({
                "StopUID":          gettext(stop, "ns:StopUID"),
                "StopID":           gettext(stop, "ns:StopID"),
                "StopName_Zh":      gettext(stop, "ns:StopName/ns:Zh_tw"),
                "StopName_En":      gettext(stop, "ns:StopName/ns:En"),
                "StopBoarding":     gettext(stop, "ns:StopBoarding"),
                "StopSequence":     gettext(stop, "ns:StopSequence"),
                "PositionLon":      gettext(stop, "ns:StopPosition/ns:PositionLon"),
                "PositionLat":      gettext(stop, "ns:StopPosition/ns:PositionLat"),
                "GeoHash":          gettext(stop, "ns:StopPosition/ns:GeoHash"),
                "StationID":        gettext(stop, "ns:StationID"),
                "StationGroupID":   gettext(stop, "ns:StationGroupID"),
                "LocationCityCode": gettext(stop, "ns:LocationCityCode"),
            })
            rows.append(row)

    df = pd.DataFrame(rows)

    # 可選：把數值欄位轉型（如果你需要的話）
    for col in ["StopSequence"]:
        if col in df.columns:
            df[col] = pd.to_numeric(df[col], errors="ignore")
    for col in ["PositionLon", "PositionLat"]:
        if col in df.columns:
            df[col] = pd.to_numeric(df[col], errors="ignore")

    return df


In [3]:
# 01_讀取站序xml

# 讀取所有站序 xml，轉存為 csv
busstopseq_folder = os.path.join(os.getcwd(), '..', "00_TDX資料下載", "01公車站序資料")
xml_files = findfiles(busstopseq_folder, filetype='.xml', recursive=False)

for xmlfile in xml_files:
    df = read_bus_stop_of_route_xml(xmlfile)
    df.to_csv(xmlfile.replace('.xml', '.csv'), index=False, encoding='utf-8-sig')

# 整併所有的csv
csv_files = findfiles(busstopseq_folder, filetype='.csv', recursive=False)
all_dfs = [pd.read_csv(f) for f in csv_files]
df_seq = pd.concat(all_dfs, ignore_index=True)

  df[col] = pd.to_numeric(df[col], errors="ignore")
  df[col] = pd.to_numeric(df[col], errors="ignore")


In [13]:
# 02 讀取shp和seq比對是否每個路線都有對應的站序資料

route_gdf = gpd.read_file(r"D:\B-Project\2025\6800\Technical\12票證資料\其他分析資料\市區公車路線資料\新北市公車路線\B1F0230003V089.shp")
df_seq = pd.read_csv(r'D:\B-Project\2025\6800\Technical\12票證資料\TicketAnalysis\00_TDX資料下載\01公車站序資料\公車站序資料_新北市_2025-11-21.xml')


In [15]:
route_gdf.head()

Unnamed: 0,RouteUID,RouteID,Direction,OperatorID,AuthorityI,ProviderID,BusRouteTy,RouteNameZ,RouteNameE,DepartureS,...,Destinatio,Destinat_1,TicketPric,TicketPr_1,FareBuffer,FareBuff_1,RouteMapIm,Updatetime,UniqueID,geometry
0,NWT16871,16871,0.0,16235,5,15,11,F239,F239,林口發電廠,...,林口加油站,Linkou Gas Sta.,,,,,https://ebus.gov.taipei/MapOverview?nid=045402...,2025-10-30,B1F0230003V089,"LINESTRING (121.29834 25.1176, 121.29885 25.11..."
1,NWT16901,16901,0.0,16176,5,15,11,574,574,信義國小,...,南天母廣場,Nantianmu Square,一段票,Charge for 1 sections,,,https://ebus.gov.taipei/MapOverview?nid=040005...,2025-10-30,B1F0230003V089,"LINESTRING (121.45397 24.9914, 121.45542 24.99..."
2,NWT16930,16930,0.0,16181,5,15,11,592,592,寶興路60巷口,...,捷運七張站,MRT Qizhang Sta.,一段票,1 Segment,,,https://ebus.gov.taipei/MapOverview?nid=045407...,2025-10-30,B1F0230003V089,"MULTILINESTRING ((121.53968 24.97846, 121.5397..."
3,NWT16963,16963,0.0,16250,5,15,11,F822,F822,平溪,...,東勢派出所,Dongshige Police Sta.,免費,Free,,,https://ebus.gov.taipei/MapOverview?nid=045408...,2025-10-30,B1F0230003V089,"LINESTRING (121.73804 25.02457, 121.73823 25.0..."
4,NWT16984,16984,0.0,16253,5,15,11,F931-1030,F931-1030,金山區衛生所,...,金山區公所,Jinshan District Office,,,,,https://ebus.gov.taipei/MapOverview?nid=045409...,2025-10-30,B1F0230003V089,"LINESTRING (121.64093 25.22064, 121.64116 25.2..."


In [17]:
df_seq.columns

Index(['<?xml version="1.0" encoding="utf-8"?><ArrayOfBusStopOfRoute xsi:schemaLocation="https://ptx.transportdata.tw/standard/schema/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="https://ptx.transportdata.tw/standard/schema/"><BusStopOfRoute><RouteUID>NWT10116</RouteUID><RouteID>10116</RouteID><RouteName><Zh_tw>242</Zh_tw><En>242</En></RouteName><Operators><Operator><OperatorID>16176</OperatorID><OperatorName><Zh_tw>臺北客運</Zh_tw><En>Taipei Bus Co.',
       ' Ltd.</En></OperatorName><OperatorCode>TaipeiBus</OperatorCode><OperatorNo>1407</OperatorNo></Operator></Operators><SubRouteUID>NWT101160</SubRouteUID><SubRouteID>101160</SubRouteID><SubRouteName><Zh_tw>242</Zh_tw><En>242</En></SubRouteName><Direction>0</Direction><City>NewTaipei</City><CityCode>NWT</CityCode><Stops><Stop><StopUID>NWT20146</StopUID><StopID>20146</StopID><StopName><Zh_tw>中和站</Zh_tw><En>Zhonghe Stop</En></StopName><StopBoarding>0</StopBoarding><StopSequence>1</StopSequence><StopPosition><PositionLon>1