In [1]:
from ProcessBasic import *

In [75]:
def flatten_directional_traffic_df(df: pd.DataFrame) -> pd.DataFrame:
    """
    將具有 MultiIndex 欄位（方向性交通量）的資料表展平為 long-form。
    自動辨識方向欄位與時間欄位（起、迄）。
    
    Parameters:
        df (pd.DataFrame): 原始的 MultiIndex 欄位資料表
        
    Returns:
        pd.DataFrame: 展平後含方向欄的資料表
    """
    if not isinstance(df.columns, pd.MultiIndex):
        raise ValueError("輸入的欄位不是 MultiIndex 結構")

    # 所有第一層欄位
    all_lv0 = df.columns.get_level_values(0).unique()

    # 自動找出方向欄（排除 起 與 迄）
    direction_cols = [col for col in all_lv0 if col not in ['起', '迄']]

    # 自動找出「起」、「迄」欄位（不假設第二層）
    col_起 = next(col for col in df.columns if col[0] == '起')
    col_迄 = next(col for col in df.columns if col[0] == '迄')

    df_list = []

    for direction in direction_cols:
        temp = df[direction].copy()
        temp['方向'] = direction
        temp['起'] = df[col_起]
        temp['迄'] = df[col_迄]
        df_list.append(temp)

    df_flat = pd.concat(df_list, ignore_index=True)

    # 欄位排序
    ordered_cols = ['方向', '起', '迄'] + [col for col in df_flat.columns if col not in ['方向', '起', '迄']]
    return df_flat[ordered_cols]

def primily_organized(filelist):
    '''讀取交通量檔案檔案清單，並整合成美5分鐘一筆的表格'''
    outputdf = []
    for file in filelist:
        surveynumber = os.path.basename(file).split(".")[0] #讀取檔名進行命名(因xlsx內中並沒有可以協助判斷的部分)
        surveyplace = os.path.basename(file).split(".")[1]

        sheetnamelist = ['平日','假日']
        for sheetname in sheetnamelist:
            df = pd.read_excel(file, header=[0,1], sheet_name=sheetname)
            df.rename(columns = {'Unnamed: 0_level_0':'起'}, inplace=True)
            df.rename(columns = {'Unnamed: 1_level_0':'迄'}, inplace=True)

            # 直接從第二層尋找欄位名稱為 1.8 的位置，在此之前才是調查表格
            drop_from = next(i for i, col in enumerate(df.columns) if col[1] == 1.8)
            df = df.iloc[:, :drop_from] # 使用 iloc 保留 drop_from 之前的所有欄位

            df_flat = flatten_directional_traffic_df(df)
            df_flat['原始資料'] = get_filename_withoutprojectname(file)
            df_flat['調查計畫書點位編號'] = surveynumber
            df_flat['調查路段'] = surveyplace
            df_flat['快慢車道'] = df_flat['方向'].apply(lambda x: '慢車道' if '慢車道' in x else '快車道')
            df_flat['方向'] = df_flat['方向'].str.split('-').str[0]
            df_flat['平假日'] = sheetname
            for column in ['平假日', '方向', '快慢車道', '調查路段', '調查計畫書點位編號']:
                df_flat = move_column(df = df_flat, column_name = column, insert_index=0)
            
            outputdf.append(df_flat)
    
    output = pd.concat(outputdf)

    return output

def getPCU(df):
    PCE = {'聯結車':3.0, 
        '大貨車':1.8,
        '大客車':1.8,
        '小型車':1.0,
        '機車':0.42,
        '自行車':0.42}

    df['Volume'] = df[['聯結車', '大貨車', '大客車(客運)', '遊覽車', '小型車', '機車']].sum(axis=1)


    df['聯結車PCU'] = df['聯結車'] * PCE['聯結車']
    df['大貨車PCU'] = df['大貨車'] * PCE['大貨車']
    df['大客車PCU'] = (df['大客車(客運)'] + df['遊覽車']) * PCE['大客車']
    df['小型車PCU'] = df['小型車'] * PCE['小型車']
    df['機車PCU'] = df['機車'] * PCE['機車']

    df['PCU'] = df[['聯結車PCU', '大貨車PCU', '大客車PCU', '小型車PCU', '機車PCU']].sum(axis=1)
    df = move_column(df = df, column_name="原始資料", insert_index=-1)

    return df 

def aggregate_by_window(df, window_size, starttimecolumn , endtimecolumn, group_cols = ['檔案', '平假日', '方向'], sum_cols = ['PCU'], drop = False):
    '''Alignment

    Args:
        df (pd.DataFrame): 要操作的 DataFrame。
        window_size (int): 每幾格一起換算。
        starttimecolumn (str): 開始時間。
        endtimecolumn (str): 截止時間。
        group_cols(list) : 需要用來分組的欄位。
        sum_cols(list)：需要計算的欄位，預設是PCU一欄。

    Returns:
        pd.DataFrame: 調整後的 DataFrame。
    
    '''

    df_result = []

    # 確保「起」時間是可排序的
    df[starttimecolumn] = pd.to_datetime(df[starttimecolumn].astype(str), format='%H:%M:%S', errors='coerce').dt.time
    df[endtimecolumn] = pd.to_datetime(df[endtimecolumn].astype(str), format='%H:%M:%S', errors='coerce').dt.time
    df = df.sort_values(group_cols + [starttimecolumn]).reset_index(drop=True)

    # 分組
    
    grouped = df.groupby(group_cols)

    # 對每組做 rolling window 加總
    for group_keys, group_df in grouped:
        group_df = group_df.reset_index(drop=True)
        for i in range(len(group_df) - window_size + 1):
            window = group_df.iloc[i:i+window_size]

            # 基本欄位
            agg_row = {col: key for col, key in zip(group_cols, group_keys)}

            # 時間欄位處理
            agg_row[starttimecolumn] = window.iloc[0][starttimecolumn].strftime('%H:%M:%S')
            agg_row[endtimecolumn] = window.iloc[-1][endtimecolumn] if pd.notna(window.iloc[-1][endtimecolumn]) else None

            for col in sum_cols:
                agg_row[col] = window[col].sum()

            df_result.append(agg_row)
    
    df_result = pd.DataFrame(df_result)
    
    if drop:
        df_result = (
            df_result
            .sort_values(by=sum_cols, ascending=[False] * len(sum_cols))
            .drop_duplicates(subset=group_cols)
            .sort_values(by=group_cols)
            .reset_index(drop=True)
        )
        

    return df_result

def hourlyformat(df):
    dfhour = df.copy()
    dfhour['小時'] = pd.to_datetime(dfhour['起'].astype(str), format='%H:%M:%S', errors='coerce').dt.hour
    dfhour = move_column(dfhour, '小時', 5)
    sum_cols = ['聯結車', '大貨車', '大客車(客運)', '遊覽車', '小型車', '機車', '自行車&行人',
                'Volume', '聯結車PCU', '大貨車PCU', '大客車PCU', '小型車PCU', '機車PCU', 'PCU']

    dfhour = (
        dfhour
        .groupby(['調查計畫書點位編號', '調查路段', '快慢車道', '方向', '平假日', '小時'])
        .agg({**{col: 'sum' for col in sum_cols}, '原始資料': 'first'})
        .reset_index()
    )

    return dfhour

In [76]:
# Step0 定義資料夾
datafolder = os.path.abspath(os.path.join(get_projectfolderpath(), 'Technical', '06_交通量調查', '02_原始資料'))
volumefolder = os.path.abspath(os.path.join(datafolder, '01_路段交通量'))
speedfolder = os.path.abspath(os.path.join(datafolder, '02_路段旅行速率'))

initialfolder = create_folder(os.path.join(os.getcwd(), '..', '01_資料初步彙整'))
volume_initialfolder = create_folder(os.path.join(initialfolder, '01_路段交通量'))
speed_initialfolder = create_folder(os.path.join(initialfolder, '02_路段旅行速率'))

# Step1 彙整路段交通量資料
files = findfiles(volumefolder,'xlsx')
df = primily_organized(filelist=files)
df = getPCU(df)
dfhour = hourlyformat(df)

dfpeak = aggregate_by_window(df=df, window_size=4, 
                             starttimecolumn='起', endtimecolumn='迄', 
                             group_cols=['調查計畫書點位編號', '調查路段', '快慢車道', '方向', '平假日'], 
                             drop=True)

volumeoutputpath = os.path.abspath(os.path.join(volume_initialfolder, '路段交通量資料彙整.xlsx'))
with pd.ExcelWriter(volumeoutputpath, engine='xlsxwriter') as writer:
    df.to_excel(writer, sheet_name='交通量原始資料(每五分鐘一筆)', index=False)
    dfpeak.to_excel(writer, sheet_name='尖峰時段PCU(每五分鐘一筆)', index=False)
    dfpeak.to_excel(writer, sheet_name='分時交通量', index=False)

for sheetname in get_excel_sheet_names(volumeoutputpath):
    reformat_excel(excel_path=volumeoutputpath, sheetname=sheetname)


In [73]:
dfhour = df.copy()
dfhour['小時'] = pd.to_datetime(dfhour['起'].astype(str), format='%H:%M:%S', errors='coerce').dt.hour
dfhour = move_column(dfhour, '小時', 5)
sum_cols = ['聯結車', '大貨車', '大客車(客運)', '遊覽車', '小型車', '機車', '自行車&行人',
            'Volume', '聯結車PCU', '大貨車PCU', '大客車PCU', '小型車PCU', '機車PCU', 'PCU']

dfhour = (
    dfhour
    .groupby(['調查計畫書點位編號', '調查路段', '快慢車道', '方向', '平假日', '小時'])
    .agg({**{col: 'sum' for col in sum_cols}, '原始資料': 'first'})
    .reset_index()
)

In [74]:
dfhour

Unnamed: 0,調查計畫書點位編號,調查路段,快慢車道,方向,平假日,小時,聯結車,大貨車,大客車(客運),遊覽車,...,機車,自行車&行人,Volume,聯結車PCU,大貨車PCU,大客車PCU,小型車PCU,機車PCU,PCU,原始資料
0,SL1-1,台61線-後龍觀海大橋,快車道,往東(北),假日,0,32,14,0,0,...,10,0,74,96.0,25.2,0.0,18.0,4.20,143.40,~\台北鼎漢(B_6740)\6787臺鐵海線雙軌化綜合規劃\Technical\06_交通...
1,SL1-1,台61線-後龍觀海大橋,快車道,往東(北),假日,1,27,14,0,0,...,9,0,70,81.0,25.2,0.0,20.0,3.78,129.98,~\台北鼎漢(B_6740)\6787臺鐵海線雙軌化綜合規劃\Technical\06_交通...
2,SL1-1,台61線-後龍觀海大橋,快車道,往東(北),假日,2,33,13,0,0,...,5,0,72,99.0,23.4,0.0,21.0,2.10,145.50,~\台北鼎漢(B_6740)\6787臺鐵海線雙軌化綜合規劃\Technical\06_交通...
3,SL1-1,台61線-後龍觀海大橋,快車道,往東(北),假日,3,21,21,0,0,...,4,0,58,63.0,37.8,0.0,12.0,1.68,114.48,~\台北鼎漢(B_6740)\6787臺鐵海線雙軌化綜合規劃\Technical\06_交通...
4,SL1-1,台61線-後龍觀海大橋,快車道,往東(北),假日,4,33,9,0,0,...,8,0,72,99.0,16.2,0.0,22.0,3.36,140.56,~\台北鼎漢(B_6740)\6787臺鐵海線雙軌化綜合規劃\Technical\06_交通...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1435,SL4-4,台74線-烏溪橋,快車道,往西(南),平日,19,4,22,21,12,...,305,0,3162,12.0,39.6,59.4,2798.0,128.10,3037.10,~\台北鼎漢(B_6740)\6787臺鐵海線雙軌化綜合規劃\Technical\06_交通...
1436,SL4-4,台74線-烏溪橋,快車道,往西(南),平日,20,2,25,14,5,...,345,0,2668,6.0,45.0,34.2,2277.0,144.90,2507.10,~\台北鼎漢(B_6740)\6787臺鐵海線雙軌化綜合規劃\Technical\06_交通...
1437,SL4-4,台74線-烏溪橋,快車道,往西(南),平日,21,4,27,3,5,...,237,0,2259,12.0,48.6,14.4,1983.0,99.54,2157.54,~\台北鼎漢(B_6740)\6787臺鐵海線雙軌化綜合規劃\Technical\06_交通...
1438,SL4-4,台74線-烏溪橋,快車道,往西(南),平日,22,9,20,6,2,...,126,0,1513,27.0,36.0,14.4,1350.0,52.92,1480.32,~\台北鼎漢(B_6740)\6787臺鐵海線雙軌化綜合規劃\Technical\06_交通...


In [None]:
# Step2 彙整路段旅行速率資料