# M03A

M03A資料可作為統計通過門架ID對應的**通過量**  
可以分析路段的道路服務水準

In [None]:
import os
import requests
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import subprocess
import shutil

In [3]:
def create_folder(folder_name):
    """建立資料夾"""
    if not os.path.exists(folder_name):
        os.makedirs(folder_name)
    return os.path.abspath(folder_name)

def delete_folders(deletelist):
    """
    刪除資料夾
    deletelist(list):需要為皆為路徑的list
    """
    for folder_name in deletelist: 
        if os.path.exists(folder_name): # 檢查資料夾是否存在
            shutil.rmtree(folder_name) # 刪除資料夾及其內容
        else:
            print(f"資料夾 '{folder_name}' 不存在。")

def getdatelist(time1, time2):
    '''
    建立日期清單
    time1、time2(str):為%Y-%M-%D格式的日期字串
    '''
    if time1 > time2:
        starttime = time2
        endtime = time1
    else:
        starttime = time1
        endtime = time2

    date_range = pd.date_range(start=starttime, end=endtime)
    datelist = [d.strftime("%Y%m%d") for d in date_range]
    return datelist

In [6]:
# ===== Step 0: 手動需要調整的參數 =====

# 需要調整的項目有2個
# 1. 調整需要確認下載的資料型態是什麼
datatype = "M03A"  # Data type (e.g., M03A, M06A, M05A) 

# 2. 調整下載的資料區間
starttime = "2024-07-16"
endtime = "2024-07-19"
datelist = getdatelist(endtime,starttime) # 下載的時間區間清單

# 建立後續要處理儲存資料的資料夾位置
savelocation = create_folder(os.path.join(os.getcwd(), datatype))
rawdatafolder = create_folder(os.path.join(savelocation, '0_rawdata'))
mergefolder = create_folder(os.path.join(savelocation, '1_merge'))
excelfolder = create_folder(os.path.join(savelocation, '2_excel'))
basicurl = "https://tisvcloud.freeway.gov.tw/history/TDCS/"
url = basicurl + datatype



In [None]:
delete_folders([savelocation])

In [7]:
downloadurl = f"{url}/{datatype}_{datelist[0]}.tar.gz"

In [None]:
downloadurl

In [None]:
import os
import requests
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import subprocess
import shutil

######################## Step 0: Basic Settings ############################
location = os.getcwd()
data_type = "M06A"  # Data type (e.g., M03A, M06A, M05A)
savelocation = os.path.join(location, data_type)
os.makedirs(savelocation, exist_ok=True)

# Subfolder for raw data
savelocation_origin = os.path.join(location, data_type, "0_rawdata")
os.makedirs(savelocation_origin, exist_ok=True)

# Subfolder for merged data
savelocation_merge = os.path.join(location, data_type, "1_merge")
os.makedirs(savelocation_merge, exist_ok=True)

# Subfolder for Excel files
savelocation_excel = os.path.join(location, data_type, "2_excel")
os.makedirs(savelocation_excel, exist_ok=True)

url = "https://tisvcloud.freeway.gov.tw/history/TDCS/"
urllocation = url + data_type

print(f"Main folder: {savelocation}")
print(f"Raw data folder: {savelocation_origin}")
print(f"Merged data folder: {savelocation_merge}")
print(f"Excel files folder: {savelocation_excel}")
print(f"Download URL: {urllocation}")

######################## Step 1: Time Range ########################
Start_Time = "2024-08-01"
End_Time = "2024-08-01"
date_range = pd.date_range(start=Start_Time, end=End_Time)
date = [d.strftime("%Y%m%d") for d in date_range]

######################## Step 2: Download M06A and Organized ########################

# Process to excel
def M06A_tohour(df):
    df.columns = ['VehicleType', 'DetectionTimeO', 'GantryO', 'DetectionTimeD', 'GantryD', 'TripLength', 'TripEnd']
    df = df[df['TripEnd'] == 'Y']
    df['DetectionTimeO'] = pd.to_datetime(df['DetectionTimeO'])
    df['DataHour'] = df['DetectionTimeO'].dt.hour
    df['DataDate'] = df['DetectionTimeO'].dt.date
    df_output = df.groupby(['DataDate', 'DataHour','GantryO', 'GantryD', 'VehicleType' ]).size().reset_index(name='VehicleCount')
    return df_output


for i in range(len(date)):
    os.chdir(savelocation_origin)

    # Data download
    downloadurl = f"{urllocation}/{data_type}_{date[i]}.tar.gz"
    destfile = f"{data_type}_{date[i]}.tar.gz"
    print(f"Downloading {destfile}...")
    response = requests.get(downloadurl)
    with open(destfile, 'wb') as file:
        file.write(response.content)

    # Unzip and delete compressed file
    print("Extracting files...")
    subprocess.run([r"C:\Program Files\7-Zip\7zG.exe", "x", destfile, f"-o{savelocation_origin}"])
    subprocess.run([r"C:\Program Files\7-Zip\7zG.exe", "x", destfile.replace(".gz", ""), f"-o{savelocation_origin}"])
    os.remove(destfile)
    os.remove(destfile.replace(".gz", ""))

    # Merge hourly data
    print("Merging hourly data...")
    df = pd.DataFrame(columns=['VehicleType', 'DetectionTimeO', 'GantryO', 'DetectionTimeD', 'GantryD', 'TripLength', 'TripEnd'])
    path = os.path.join(savelocation_origin, data_type, date[i])
    hour = os.listdir(path)

    for j in range(len(hour)):
        path2 = os.path.join(path, hour[j])
        files = os.listdir(path2)
        for k in range(len(files)):
            read_path = os.path.join(path2, files[k])
            # M06A = pd.read_csv(read_path, header=None, names=['VehicleType', 'DetectionTimeO', 'GantryO', 'DetectionTimeD', 'GantryD', 'TripLength', 'TripEnd','TripInformation'])
            M06A = pd.read_csv(
                    read_path,
                    header=None,
                    names=['VehicleType', 'DetectionTimeO', 'GantryO', 'DetectionTimeD', 'GantryD', 'TripLength', 'TripEnd'],
                    usecols=[0, 1, 2, 3, 4, 5, 6] 
                )
            df = pd.concat([df, M06A], ignore_index=True)
        print(f"Processing hour {j+1}/{len(hour)}", end='\r')

    export_path = os.path.join(savelocation_merge, f"{date[i]}.csv")
    df.to_csv(export_path, index=False)
    print(f"\nSaved merged data to {export_path}")


    # Organize Hourly counts
    print("Organizing vehicle types...")
    df_hour = M06A_tohour(df)

    # Save to Excel
    export_path = os.path.join(savelocation_excel, f"{date[i]}.xlsx")
    df_hour.to_excel(export_path, index=False)
    print(f"Saved Excel file to {export_path}")

    print(f"Completed processing for date: {date[i]}\n")

print("All processing completed.")
 