In [None]:
import os
import requests
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import subprocess
import shutil
import tarfile
import xml.etree.ElementTree as ET
import re

def create_folder(folder_name):
    """建立資料夾"""
    if not os.path.exists(folder_name):
        os.makedirs(folder_name)
    return os.path.abspath(folder_name)

def delete_folders(deletelist):
    """
    刪除資料夾
    deletelist(list):需要為皆為路徑的list
    """
    for folder_name in deletelist: 
        if os.path.exists(folder_name): # 檢查資料夾是否存在
            shutil.rmtree(folder_name) # 刪除資料夾及其內容
        else:
            print(f"資料夾 '{folder_name}' 不存在。")

def getdatelist(time1, time2):
    '''
    建立日期清單
    time1、time2(str):為%Y-%M-%D格式的日期字串
    '''
    if time1 > time2:
        starttime = time2
        endtime = time1
    else:
        starttime = time1
        endtime = time2

    date_range = pd.date_range(start=starttime, end=endtime)
    datelist = [d.strftime("%Y%m%d") for d in date_range]
    return datelist

def download_VD(url, downloadpath):
    """
    下載指定網址的 XML 檔案到指定位置。

    Args:
        url (str): 要下載的 XML 檔案網址。
        downloadpath (str): 檔案下載後的儲存路徑（包含檔案名稱）。
    """

    try:
        response = requests.get(url, stream=True)
        response.raise_for_status()  # 檢查 HTTP 狀態碼，如有錯誤則拋出異常

        with open(downloadpath, 'wb') as f:
            for chunk in response.iter_content(chunk_size=8192):
                f.write(chunk)

    except requests.exceptions.RequestException as e:
        print(f"下載時發生錯誤：{e}")
    except Exception as e:
        print(f"發生錯誤：{e}")

def read_xml(xml_file_path, return_raw=False):
    """
    讀取並解析 XML 檔案。

    Args:
        xml_file_path (str): XML 檔案路徑。
        return_raw (bool): 是否返回原始 XML 內容，預設為 False (返回解析後的 XML 根節點)。

    Returns:
        ElementTree.Element 或 str: 解析後的 XML 根節點，或原始 XML 內容 (若 return_raw=True)。
        None: 如果檔案未找到或解析失敗。
    """
    try:
        with open(xml_file_path, 'r', encoding='utf-8') as f:
            xml_content = f.read()
        
        if return_raw:
            return xml_content  # 返回原始 XML 內容
        
        tree = ET.ElementTree(ET.fromstring(xml_content))
        return tree.getroot()  # 返回解析後的 XML 根節點
    except FileNotFoundError:
        print(f"檔案未找到：{xml_file_path}")
        return None
    except ET.ParseError as e:
        print(f"解析 XML 檔案時發生錯誤：{e}")
        return None

def parse_vd_xml(xml_content):
    """
    解析 VD XML 資料並轉換為 DataFrame。

    Args:
        xml_content (str): XML 內容。

    Returns:
        pd.DataFrame: 解析後的 DataFrame。
    """
    namespace = {'ns': 'http://traffic.transportdata.tw/standard/traffic/schema/'}
    root = ET.fromstring(xml_content)

    # 解析全域資訊
    update_time = root.find('ns:UpdateTime', namespace).text
    update_interval = root.find('ns:UpdateInterval', namespace).text
    authority_code = root.find('ns:AuthorityCode', namespace).text

    # 解析 VD 資料
    data = []
    for vd in root.findall('ns:VDs/ns:VD', namespace):
        vdid = vd.find('ns:VDID', namespace).text
        sub_authority_code = vd.find('ns:SubAuthorityCode', namespace).text
        bi_directional = vd.find('ns:BiDirectional', namespace).text
        vd_type = vd.find('ns:VDType', namespace).text
        location_type = vd.find('ns:LocationType', namespace).text
        detection_type = vd.find('ns:DetectionType', namespace).text
        position_lon = vd.find('ns:PositionLon', namespace).text
        position_lat = vd.find('ns:PositionLat', namespace).text
        road_id = vd.find('ns:RoadID', namespace).text
        road_name = vd.find('ns:RoadName', namespace)
        road_name = road_name.text if road_name is not None else ''  # 防止 AttributeError
        road_class = vd.find('ns:RoadClass', namespace)
        road_class = road_class.text if road_class is not None else ''
        location_mile = vd.find('ns:LocationMile', namespace)
        location_mile = location_mile.text if location_mile is not None else ''

        # 解析 RoadSection
        start = vd.find('ns:RoadSection/ns:Start', namespace)
        end = vd.find('ns:RoadSection/ns:End', namespace)
        start_text = start.text if start is not None else ''
        end_text = end.text if end is not None else ''

        # 解析 DetectionLinks
        detection_links = vd.findall('ns:DetectionLinks/ns:DetectionLink', namespace)
        for link in detection_links:
            link_id = link.find('ns:LinkID', namespace).text
            bearing = link.find('ns:Bearing', namespace).text
            road_direction = link.find('ns:RoadDirection', namespace).text
            lane_num = link.find('ns:LaneNum', namespace).text
            actual_lane_num = link.find('ns:ActualLaneNum', namespace).text

            data.append([
                update_time, update_interval, authority_code, vdid, sub_authority_code, bi_directional,
                link_id, bearing, road_direction, lane_num, actual_lane_num, vd_type, location_type,
                detection_type, position_lon, position_lat, road_id, road_name, road_class, start_text, end_text, location_mile
            ])

    # 轉成 DataFrame
    columns = [
        "UpdateTime", "UpdateInterval", "AuthorityCode", "VDID", "SubAuthorityCode", "BiDirectional",
        "LinkID", "Bearing", "RoadDirection", "LaneNum", "ActualLaneNum", "VDType", "LocationType",
        "DetectionType", "PositionLon", "PositionLat", "RoadID", "RoadName", "RoadClass", "Start", "End", "LocationMile"
    ]
    
    return pd.DataFrame(data, columns=columns)

def parse_vdlive_xml(xml_path):
    tree = ET.parse(xml_path)
    root = tree.getroot()

    # 命名空間
    namespace = {'ns': 'http://traffic.transportdata.tw/standard/traffic/schema/'}

    # 解析全局欄位
    update_time = root.find('ns:UpdateTime', namespace).text
    update_interval = root.find('ns:UpdateInterval', namespace).text
    authority_code = root.find('ns:AuthorityCode', namespace).text

    # 存放資料的列表
    data = []

    # 遍歷 VDLive
    for vd in root.findall(".//ns:VDLive", namespace):
        vdid = vd.find("ns:VDID", namespace).text
        status = vd.find("ns:Status", namespace).text
        data_collect_time = vd.find("ns:DataCollectTime", namespace).text
        
        for link_flow in vd.findall(".//ns:LinkFlow", namespace):
            link_id = link_flow.find("ns:LinkID", namespace).text
            
            for lane in link_flow.findall(".//ns:Lane", namespace):
                lane_id = lane.find("ns:LaneID", namespace).text
                lane_type = lane.find("ns:LaneType", namespace).text
                speed = lane.find("ns:Speed", namespace).text
                occupancy = lane.find("ns:Occupancy", namespace).text
                
                # 遍歷車輛類型
                for vehicle in lane.findall(".//ns:Vehicle", namespace):
                    vehicle_type = vehicle.find("ns:VehicleType", namespace).text
                    volume = vehicle.find("ns:Volume", namespace).text
                    speed_2 = vehicle.find("ns:Speed", namespace).text
                    
                    # 加入記錄
                    data.append([
                        update_time, update_interval, authority_code, vdid, link_id, 
                        lane_id, lane_type, speed, occupancy, vehicle_type, volume, 
                        speed_2, status, data_collect_time
                    ])

    # 建立 DataFrame
    columns = [
        "UpdateTime", "UpdateInterval", "AuthorityCode", "VDID", "LinkID", 
        "LaneID", "LaneType", "Speed", "Occupancy", "VehicleType", "Volume", 
        "SpeedAvg", "Status", "DataCollectTime"
    ]
    df = pd.DataFrame(data, columns=columns)
    return df

def vdlive_preliminary_process(df, vdlist = None):
    df['Volume'] = df['Volume'].astype('int64')
    df['Status'] = df['Status'].astype('int64')
    df = df[(df['Volume'] > 0) & (df['Status'] == 0)]

    if vdlist:
        df = df[df['VDID'].isin(vdlist)]

    return df.reset_index(drop = True)

def findfiles(filefolderpath, filetype='.csv'):
    """
    尋找指定路徑下指定類型的檔案，並返回檔案路徑列表。

    Args:
        filefolderpath (str): 指定的檔案路徑。
        filetype (str, optional): 要尋找的檔案類型，預設為 '.csv'。

    Returns:
        list: 包含所有符合條件的檔案路徑的列表。
    """

    filelist = []  # 建立一個空列表來儲存檔案路徑

    # 使用 os.walk 遍歷資料夾及其子資料夾
    for root, _, files in os.walk(filefolderpath):
        for file in files:
            if file.endswith(filetype):  # 檢查檔案是否以指定類型結尾
                file_path = os.path.join(root, file)  # 建立完整的檔案路徑
                filelist.append(file_path)  # 將檔案路徑添加到列表中

    return filelist

def download_and_extract_VD(url, datatype, date, downloadfolder, keep = False):
    '''針對高公局交通資料庫的格式進行下載'''
    hourlist = [f"{i:02d}" for i in range(24)]
    minutelist = [f"{i:02d}" for i in range(0, 60, 1)]
    downloadfolder = create_folder(os.path.join(downloadfolder, date))
    gzdownloadfolder = create_folder(os.path.join(downloadfolder, '壓縮檔'))
    for hour in hourlist:
        for minute in minutelist:
            downloadurl = f"{url}/{date}/VDLive_{hour}{minute}.xml.gz"
            destfile = os.path.join(gzdownloadfolder, f"VDLive_{hour}{minute}.xml.gz")
            print(f"destfile : {destfile}" )
            response = requests.get(downloadurl)
            if response.status_code == 200:
                with open(destfile, 'wb') as file:
                    file.write(response.content)
                extract_gz(destfile, downloadfolder)
    os.remove(gzdownloadfolder)
    return downloadfolder

def getdatelist(time1, time2):
    '''
    建立日期清單
    time1、time2(str):為%Y-%M-%D格式的日期字串
    '''
    if time1 > time2:
        starttime = time2
        endtime = time1
    else:
        starttime = time1
        endtime = time2

    date_range = pd.date_range(start=starttime, end=endtime)
    datelist = [d.strftime("%Y%m%d") for d in date_range]
    return datelist

def VDfolder(datatype = 'VDlive'):
    savelocation = create_folder(os.path.join(os.getcwd(), datatype))
    rawdatafolder = create_folder(os.path.join(savelocation, '0_rawdata'))
    mergefolder = create_folder(os.path.join(savelocation, '1_merge'))
    excelfolder = create_folder(os.path.join(savelocation, '2_excel'))
    return rawdatafolder, mergefolder, excelfolder

def get_vd():
    vdfolder = create_folder(os.path.join(os.getcwd(), 'VD'))
    vdxmlfolder = create_folder(os.path.join(vdfolder, 'xml'))
    vdpath = os.path.join(os.path.join(vdxmlfolder, 'VD.xml'))
    download_VD(url = 'https://tisvcloud.freeway.gov.tw/history/motc20/VD.xml', downloadpath = vdpath)
    VD = read_xml(vdpath, return_raw=True)
    VD = parse_vd_xml(VD)
    VD.to_csv(os.path.join(vdfolder, 'VD.csv'), index = False)
    return VD

def check_pathexist(path):
    return os.path.exists(path)


In [None]:
# ===== Step 0: 手動需要調整的參數 =====

# 調整下載的資料區間
starttime = "2025-01-24"
endtime = "2025-01-24"
datelist = getdatelist(endtime,starttime) # 下載的時間區間清單

# 想要過濾的清單
GuanxiVD = ['VD-N3-N-68-I-EN-21-龍潭', 'VD-N3-N-68-O-SE-1-龍潭', 'VD-N3-N-68-O-SE-21-龍潭', 'VD-N3-N-68-O-SW-22-龍潭', 'VD-N3-N-68-I-WN-22-龍潭', 'VD-N3-S-80.400-M-LOOP', 'VD-N3-S-85.010-M-RS', 'VD-N3-S-81.850-M-LOOP', 'VD-N3-N-69.510-M-LOOP', 'VD-N3-N-76-I-WN-關西服務區', 'VD-N3-N-71.630-M-RS', 'VD-N3-S-68-I-WS-1-龍潭', 'VD-N3-N-70.470-M-LOOP', 'VD-N3-S-68-O-NW-1-龍潭', 'VD-N3-S-67.000-M-LOOP', 'VD-N3-S-79-I-ES-1-關西', 'VD-N3-N-80.260-M-LOOP', 'VD-N3-T-79-A-E-21-關西', 'VD-N3-S-68.450-N-LOOP', 'VD-N3-N-79-I-EN-1-關西', 'VD-N3-S-70.000-M-LOOP', 'VD-N3-N-78.760-N-LOOP', 'VD-N3-S-76-I-WS-關西服務區', 'VD-N3-N-77.250-M-LOOP', 'VD-N3-T-79-A-E-22-關西']

GuanxiVD = ['VD-N3-N-68-I-EN-21-龍潭', 'VD-N3-N-68-O-SE-1-龍潭', 'VD-N3-N-68-O-SE-21-龍潭', 'VD-N3-N-68-O-SW-22-龍潭', 'VD-N3-N-68-I-WN-22-龍潭', 'VD-N3-S-80.400-M-LOOP', 'VD-N3-S-85.010-M-RS', 'VD-N3-S-81.850-M-LOOP', 'VD-N3-N-69.510-M-LOOP', 'VD-N3-N-76-I-WN-關西服務區', 'VD-N3-N-71.630-M-RS', 'VD-N3-S-68-I-WS-1-龍潭', 'VD-N3-N-70.470-M-LOOP', 'VD-N3-S-68-O-NW-1-龍潭', 'VD-N3-S-67.000-M-LOOP', 'VD-N3-S-79-I-ES-1-關西', 'VD-N3-N-80.260-M-LOOP', 'VD-N3-T-79-A-E-21-關西', 'VD-N3-S-68.450-N-LOOP', 'VD-N3-N-79-I-EN-1-關西', 'VD-N3-S-70.000-M-LOOP', 'VD-N3-N-78.760-N-LOOP', 'VD-N3-S-76-I-WS-關西服務區', 'VD-N3-N-77.250-M-LOOP', 'VD-N3-T-79-A-E-22-關西', 'VD-N3-N-73-O-SE-22-高原', 'VD-N3-S-79-O-NE-1-關西', 'VD-N3-S-68-O-NW-21-龍潭', 'VD-N3-N-85.990-M-LOOP', 'VD-N3-S-66.020-M-LOOP', 'VD-N3-N-68.020-N-LOOP', 'VD-N3-S-74.140-M-RS', 'VD-N3-N-78.000-M-RS', 'VD-N3-N-83.400-M-RS', 'VD-N3-N-73.100-M-龍潭地磅站', 'VD-N3-N-72-B-龍潭收費站', 'VD-N3-S-73-O-NE-21-高原', 'VD-N3-S-64.500-M-RS', 'VD-N3-S-68-I-WS-21-龍潭', 'VD-N3-S-76-O-NW-1-關西服務區', 'VD-N3-N-73.940-M-LOOP', 'VD-N3-N-79-O-SE-1-關西', 'VD-N3-N-73-O-SE-1-高原', 'VD-N3-S-86.750-M-LOOP', 'VD-N3-S-73-O-NW-22-高原', 'VD-N3-N-73-I-WN-21-高原', 'VD-N3-S-78.210-M-LOOP', 'VD-N3-N-81.800-M-LOOP', 'VD-N3-S-77.260-M-RS', 'VD-N3-N-73-I-EN-22-高原', 'VD-N3-N-73-O-SW-21-高原', 'VD-N3-S-73-O-NW-1-高原', 'VD-N3-S-72-B-龍潭收費站', 'VD-N3-S-71.000-M-LOOP', 'VD-N3-S-83.750-M-LOOP', 'VD-N3-N-73.150-N-LOOP', 'VD-N3-S-73-I-WS-22-高原', 'VD-N3-S-79.000-N-LOOP', 'VD-N3-N-75.490-M-RS', 'VD-N3-S-76.360-N-LOOP', 'VD-N3-N-65.990-M-RS', 'VD-N3-S-73.020-N-LOOP', 'VD-N3-S-75.430-M-LOOP', 'VD-N3-S-69.700-M-RS', 'VD-N3-S-73-I-ES-21-高原', 'VD-N3-N-76.490-N-LOOP', 'VD-N3-N-85.010-M-LOOP', 'VD-N3-N-76-O-SW-1-關西服務區']

In [None]:
def VDlive (datelist = datelist , datatype = 'VD_live', vdlist = None):
    # datatype = 'VD_live'
    url = "https://tisvcloud.freeway.gov.tw/history/motc20/VD/" 
    rawdatafolder, mergefolder, excelfolder = VDfolder(datatype=datatype)
    for date in datelist :

        # Step1 : 下載
        try:
            dowloadfilefolder = os.path.join(rawdatafolder, date)
            # dowloadfilefolder = download_and_extract_VD(url, datatype, date, downloadfolder = rawdatafolder, keep = False)
            # delete_folders([os.path.join(dowloadfilefolder,'壓縮檔')])
        except:
            pass

        # Step2 : xml -> csv
        dowloadfilefolder = os.path.join(rawdatafolder, date)
        delete_folders([os.path.join(dowloadfilefolder,'壓縮檔')])

        filelist = findfiles(filefolderpath=dowloadfilefolder, filetype='.xml')
        VDlivemergename = os.path.join(mergefolder, f"{date}.csv")
        check_pathexist = check_pathexist(VDlivemergename)
        if check_pathexist == False: # 如果已經有merge過的檔案不重複處理 (怕使用者下載不同匙間區間)
            VDLive = []
            for filepath in filelist:
                # filepath = filelist[0]
                df = parse_vdlive_xml(filepath)
                df = vdlive_preliminary_process(df, vdlist=vdlist)
                VDLive.append(df)
            VDLive = pd.concat(VDLive, ignore_index=True)
            VDlive.to_csv(check_pathexist, index = False)
        else :
            VDLive = pd.read_csv(VDlivemergename)

        # Step3 : 統計每個小時通過Volume
        




In [None]:
VDlive(datelist = datelist , datatype = 'VD_live', vdlist = GuanxiVD)

In [None]:
VDLive

In [None]:
get

In [None]:
get_vd()

In [None]:
dowloadfilefolder = 

In [None]:
vdfolder = create_folder(os.path.join(os.getcwd(), 'VD'))
vdxmlfolder = create_folder(os.path.join(vdfolder, 'xml'))
vdpath = os.path.join(os.path.join(vdxmlfolder, 'VDLive.xml'))
download_VD(url = 'https://tisvcloud.freeway.gov.tw/history/motc20/VDLive.xml', downloadpath = vdpath)

In [None]:
VD = get_vd()

In [None]:
VD

In [None]:
import xml.etree.ElementTree as ET
import pandas as pd



In [None]:
# 使用範例
vd_df = parse_vd_xml(vd)
vd_df