In [37]:
import os
import requests
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import subprocess
import shutil
import tarfile
import xml.etree.ElementTree as ET
import re

def create_folder(folder_name):
    """建立資料夾"""
    if not os.path.exists(folder_name):
        os.makedirs(folder_name)
    return os.path.abspath(folder_name)

def delete_folders(deletelist):
    """
    刪除資料夾
    deletelist(list):需要為皆為路徑的list
    """
    for folder_name in deletelist: 
        if os.path.exists(folder_name): # 檢查資料夾是否存在
            shutil.rmtree(folder_name) # 刪除資料夾及其內容
        else:
            print(f"資料夾 '{folder_name}' 不存在。")

def getdatelist(time1, time2):
    '''
    建立日期清單
    time1、time2(str):為%Y-%M-%D格式的日期字串
    '''
    if time1 > time2:
        starttime = time2
        endtime = time1
    else:
        starttime = time1
        endtime = time2

    date_range = pd.date_range(start=starttime, end=endtime)
    datelist = [d.strftime("%Y%m%d") for d in date_range]
    return datelist

def download_VD(url, downloadpath):
    """
    下載指定網址的 XML 檔案到指定位置。

    Args:
        url (str): 要下載的 XML 檔案網址。
        downloadpath (str): 檔案下載後的儲存路徑（包含檔案名稱）。
    """

    try:
        response = requests.get(url, stream=True)
        response.raise_for_status()  # 檢查 HTTP 狀態碼，如有錯誤則拋出異常

        with open(downloadpath, 'wb') as f:
            for chunk in response.iter_content(chunk_size=8192):
                f.write(chunk)

    except requests.exceptions.RequestException as e:
        print(f"下載時發生錯誤：{e}")
    except Exception as e:
        print(f"發生錯誤：{e}")

def read_xml(xml_file_path, return_raw=False):
    """
    讀取並解析 XML 檔案。

    Args:
        xml_file_path (str): XML 檔案路徑。
        return_raw (bool): 是否返回原始 XML 內容，預設為 False (返回解析後的 XML 根節點)。

    Returns:
        ElementTree.Element 或 str: 解析後的 XML 根節點，或原始 XML 內容 (若 return_raw=True)。
        None: 如果檔案未找到或解析失敗。
    """
    try:
        with open(xml_file_path, 'r', encoding='utf-8') as f:
            xml_content = f.read()
        
        if return_raw:
            return xml_content  # 返回原始 XML 內容
        
        tree = ET.ElementTree(ET.fromstring(xml_content))
        return tree.getroot()  # 返回解析後的 XML 根節點
    except FileNotFoundError:
        print(f"檔案未找到：{xml_file_path}")
        return None
    except ET.ParseError as e:
        print(f"解析 XML 檔案時發生錯誤：{e}")
        return None

def parse_vd_xml(xml_content):
    """
    解析 VD XML 資料並轉換為 DataFrame。

    Args:
        xml_content (str): XML 內容。

    Returns:
        pd.DataFrame: 解析後的 DataFrame。
    """
    namespace = {'ns': 'http://traffic.transportdata.tw/standard/traffic/schema/'}
    root = ET.fromstring(xml_content)

    # 解析全域資訊
    update_time = root.find('ns:UpdateTime', namespace).text
    update_interval = root.find('ns:UpdateInterval', namespace).text
    authority_code = root.find('ns:AuthorityCode', namespace).text

    # 解析 VD 資料
    data = []
    for vd in root.findall('ns:VDs/ns:VD', namespace):
        vdid = vd.find('ns:VDID', namespace).text
        sub_authority_code = vd.find('ns:SubAuthorityCode', namespace).text
        bi_directional = vd.find('ns:BiDirectional', namespace).text
        vd_type = vd.find('ns:VDType', namespace).text
        location_type = vd.find('ns:LocationType', namespace).text
        detection_type = vd.find('ns:DetectionType', namespace).text
        position_lon = vd.find('ns:PositionLon', namespace).text
        position_lat = vd.find('ns:PositionLat', namespace).text
        road_id = vd.find('ns:RoadID', namespace).text
        road_name = vd.find('ns:RoadName', namespace)
        road_name = road_name.text if road_name is not None else ''  # 防止 AttributeError
        road_class = vd.find('ns:RoadClass', namespace)
        road_class = road_class.text if road_class is not None else ''
        location_mile = vd.find('ns:LocationMile', namespace)
        location_mile = location_mile.text if location_mile is not None else ''

        # 解析 RoadSection
        start = vd.find('ns:RoadSection/ns:Start', namespace)
        end = vd.find('ns:RoadSection/ns:End', namespace)
        start_text = start.text if start is not None else ''
        end_text = end.text if end is not None else ''

        # 解析 DetectionLinks
        detection_links = vd.findall('ns:DetectionLinks/ns:DetectionLink', namespace)
        for link in detection_links:
            link_id = link.find('ns:LinkID', namespace).text
            bearing = link.find('ns:Bearing', namespace).text
            road_direction = link.find('ns:RoadDirection', namespace).text
            lane_num = link.find('ns:LaneNum', namespace).text
            actual_lane_num = link.find('ns:ActualLaneNum', namespace).text

            data.append([
                update_time, update_interval, authority_code, vdid, sub_authority_code, bi_directional,
                link_id, bearing, road_direction, lane_num, actual_lane_num, vd_type, location_type,
                detection_type, position_lon, position_lat, road_id, road_name, road_class, start_text, end_text, location_mile
            ])

    # 轉成 DataFrame
    columns = [
        "UpdateTime", "UpdateInterval", "AuthorityCode", "VDID", "SubAuthorityCode", "BiDirectional",
        "LinkID", "Bearing", "RoadDirection", "LaneNum", "ActualLaneNum", "VDType", "LocationType",
        "DetectionType", "PositionLon", "PositionLat", "RoadID", "RoadName", "RoadClass", "Start", "End", "LocationMile"
    ]
    
    return pd.DataFrame(data, columns=columns)

def get_vd():
    vdfolder = create_folder(os.path.join(os.getcwd(), 'VD'))
    vdxmlfolder = create_folder(os.path.join(vdfolder, 'xml'))
    vdpath = os.path.join(os.path.join(vdxmlfolder, 'VD.xml'))
    download_VD(url = 'https://tisvcloud.freeway.gov.tw/history/motc20/VD.xml', downloadpath = vdpath)
    VD = read_xml(vdpath, return_raw=True)
    VD = parse_vd_xml(VD)
    return VD



In [38]:
VD = get_vd()

In [39]:
VD

Unnamed: 0,UpdateTime,UpdateInterval,AuthorityCode,VDID,SubAuthorityCode,BiDirectional,LinkID,Bearing,RoadDirection,LaneNum,...,LocationType,DetectionType,PositionLon,PositionLat,RoadID,RoadName,RoadClass,Start,End,LocationMile
0,2025-02-07T00:00:00+08:00,86400,NFB,VD-N3-S-15.700-M-LOOP,NFB-NR,0,0000300001550A,SW,S,5,...,5,1,121.62179,25.036867,000030,國道3號,0,,,15K+700
1,2025-02-07T00:00:00+08:00,86400,NFB,VD-N2-E-7.900-M-LOOP,NFB-NR,0,0000200000700H,E,E,4,...,5,1,121.266,25.0267,000020,國道2號,0,,,7K+900
2,2025-02-07T00:00:00+08:00,86400,NFB,VD-N3-S-152.800-M-RS,NFB-CR,0,0000300015200K,S,S,3,...,5,1,120.68301,24.430344,000030,國道3號,0,通霄交流道,苑裡交流道,152K+800
3,2025-02-07T00:00:00+08:00,86400,NFB,VD-N1-N-266.672-M-RS,NFB-SR,0,0000100126600Q,NE,N,3,...,1,1,120.379875,23.4759,000010,國道1號,0,水上交流道,嘉義交流道,266K+672
4,2025-02-07T00:00:00+08:00,86400,NFB,VD-N1-N-374.270-M-RS,NFB-SR,0,0000101176090E,NE,N,2,...,5,1,120.3179944,22.5783373,000010,國道1號,0,高雄端,漁港路交流道,374K+270
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3254,2025-02-07T00:00:00+08:00,86400,NFB,VD-N3-S-176-I-WS-1X-南下入口2,NFB-CR,0,0000301034060B,SE,S,2,...,5,2,120.59075,24.248,000030,國道3號,0,沙鹿交流道,龍井交流道,176K+000
3255,2025-02-07T00:00:00+08:00,86400,NFB,VD-N1-N-276.010-M-Loop,NFB-SR,0,0000100127600D,N,N,3,...,5,1,120.35527,23.399374,000010,國道1號,0,新營服務區,嘉義系統交流道,276K+010
3256,2025-02-07T00:00:00+08:00,86400,NFB,VD-N3-N-186.072-M-RS,NFB-CR,0,0000300118600B,NE,N,3,...,5,1,120.54896,24.178804,000030,國道3號,0,和美交流道,龍井交流道,186K+072
3257,2025-02-07T00:00:00+08:00,86400,NFB,VD-N1-S-54.410-M-LOOP,NFB-NR,0,0000100005400H,SW,S,4,...,5,1,121.257484,25.00648,000010,國道1號,0,機場系統交流道,中壢服務區,54K+410


In [31]:
import xml.etree.ElementTree as ET
import pandas as pd



In [33]:
# 使用範例
vd_df = parse_vd_xml(vd)
vd_df

Unnamed: 0,UpdateTime,UpdateInterval,AuthorityCode,VDID,SubAuthorityCode,BiDirectional,LinkID,Bearing,RoadDirection,LaneNum,...,LocationType,DetectionType,PositionLon,PositionLat,RoadID,RoadName,RoadClass,Start,End,LocationMile
0,2025-02-07T00:00:00+08:00,86400,NFB,VD-N3-S-15.700-M-LOOP,NFB-NR,0,0000300001550A,SW,S,5,...,5,1,121.62179,25.036867,000030,國道3號,0,,,15K+700
1,2025-02-07T00:00:00+08:00,86400,NFB,VD-N2-E-7.900-M-LOOP,NFB-NR,0,0000200000700H,E,E,4,...,5,1,121.266,25.0267,000020,國道2號,0,,,7K+900
2,2025-02-07T00:00:00+08:00,86400,NFB,VD-N3-S-152.800-M-RS,NFB-CR,0,0000300015200K,S,S,3,...,5,1,120.68301,24.430344,000030,國道3號,0,通霄交流道,苑裡交流道,152K+800
3,2025-02-07T00:00:00+08:00,86400,NFB,VD-N1-N-266.672-M-RS,NFB-SR,0,0000100126600Q,NE,N,3,...,1,1,120.379875,23.4759,000010,國道1號,0,水上交流道,嘉義交流道,266K+672
4,2025-02-07T00:00:00+08:00,86400,NFB,VD-N1-N-374.270-M-RS,NFB-SR,0,0000101176090E,NE,N,2,...,5,1,120.3179944,22.5783373,000010,國道1號,0,高雄端,漁港路交流道,374K+270
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3254,2025-02-07T00:00:00+08:00,86400,NFB,VD-N3-S-176-I-WS-1X-南下入口2,NFB-CR,0,0000301034060B,SE,S,2,...,5,2,120.59075,24.248,000030,國道3號,0,沙鹿交流道,龍井交流道,176K+000
3255,2025-02-07T00:00:00+08:00,86400,NFB,VD-N1-N-276.010-M-Loop,NFB-SR,0,0000100127600D,N,N,3,...,5,1,120.35527,23.399374,000010,國道1號,0,新營服務區,嘉義系統交流道,276K+010
3256,2025-02-07T00:00:00+08:00,86400,NFB,VD-N3-N-186.072-M-RS,NFB-CR,0,0000300118600B,NE,N,3,...,5,1,120.54896,24.178804,000030,國道3號,0,和美交流道,龍井交流道,186K+072
3257,2025-02-07T00:00:00+08:00,86400,NFB,VD-N1-S-54.410-M-LOOP,NFB-NR,0,0000100005400H,SW,S,4,...,5,1,121.257484,25.00648,000010,國道1號,0,機場系統交流道,中壢服務區,54K+410
