# 公車分析

分析內容包含：
1. 營運績效指標
2. 路線每月每班次載客
3. 每個路線各班次載客
4. 路線重疊度
5. 起訖可替代率

需要準備資料：
1. 票證資料：須包含所有佔位點的資料
2. 站序資料：需帶有'Direction'欄位
3. 班表資料：需帶有'Direction'、'IsWorkday'欄位
4. 營運月報（個別路線）：做票證放大率佐證用 
5. 營運月報（整體業者）：optional，可做為長期績效業者檢討指標
6. 路線shp



## 基礎設定

包含環境設定，以及指定對應資料夾路徑（input、process、output）

In [1]:
import os
import pandas as pd
import plotly.graph_objects as go
import ipywidgets as widgets
from IPython.display import display
from tickets_cleaning import tickets_cleaning, date_defined , getDaysCount, getMagnification, tickets_match_shift,  define_quadrant , operation_calcuate

In [2]:
inputfolder_path = os.path.join(os.getcwd(),'..', 'input')
outputfolder_path = os.path.join(os.getcwd(),'..', 'output')
processfolder_path = os.path.join(os.getcwd(),'..', 'process')

# 確保資料夾存在
os.makedirs(inputfolder_path, exist_ok=True)
os.makedirs(outputfolder_path, exist_ok=True)
os.makedirs(processfolder_path, exist_ok=True)


In [3]:
# 加入補班、國定假日等日期  Note:需要手動調整
date_turn_holiday=[20230929] # 補假、國定假日、颱風天
date_turn_workday=[20230923] # 補班
startdate = 20230701
enddate = 20230930
# 計算每月的假日與平日數
dayscount = getDaysCount(startdate, enddate, date_turn_holiday, date_turn_workday)


In [50]:
dayscount.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 4 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   DataYearMonth  3 non-null      object
 1   Holiday        3 non-null      int64 
 2   Workday        3 non-null      int64 
 3   Days           3 non-null      int64 
dtypes: int64(3), object(1)
memory usage: 228.0+ bytes


In [59]:
# 定義處理整個 DataFrame 的函數
def add_weekday_counts(dataframe):
    import calendar
    # 將 DataYearMonth 轉換為 datetime 格式
    dataframe["DataYearMonth"] = pd.to_datetime(dataframe["DataYearMonth"], format="%Y%m")
    
    # 定義計算每個星期幾天數的函數
    def calculate_weekday_counts(date):
        year = date.year
        month = date.month
        weekday_counts = {i: 0 for i in range(7)}  # 初始化星期一到星期日的天數為 0

        # 取得該月份的第一天和天數
        _, num_days = calendar.monthrange(year, month)

        # 計算每一天的星期幾
        for day in range(1, num_days + 1):
            weekday = calendar.weekday(year, month, day)
            weekday_counts[weekday] += 1

        return [weekday_counts[i] for i in range(7)]  # 回傳 [Monday, Tuesday, ..., Sunday] 的次數

    # 計算每個月份的星期天數並新增到 DataFrame
    dataframe[["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]] = dataframe["DataYearMonth"].apply(
        calculate_weekday_counts
    ).apply(pd.Series)
    # 將 DataYearMonth 格式化為指定格式 (%Y%m)
    dataframe["DataYearMonth"] = dataframe["DataYearMonth"].dt.strftime("%Y%m")
    
    return dataframe

# 呼叫函數並更新 DataFrame
dayscount_add = add_weekday_counts(dayscount)
dayscount_add = dayscount_add[['DataYearMonth',"Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]]
# Melt the DataFrame
dayscount_add = pd.melt(dayscount_add, id_vars=['DataYearMonth'], var_name='Weekday', value_name='Days')


In [60]:
dayscount_add

Unnamed: 0,DataYearMonth,Weekday,Days
0,202307,Monday,5
1,202308,Monday,4
2,202309,Monday,4
3,202307,Tuesday,4
4,202308,Tuesday,5
5,202309,Tuesday,4
6,202307,Wednesday,4
7,202308,Wednesday,5
8,202309,Wednesday,4
9,202307,Thursday,4


In [45]:
dayscount.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 4 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   DataYearMonth  3 non-null      object
 1   Holiday        3 non-null      int64 
 2   Workday        3 non-null      int64 
 3   Days           3 non-null      int64 
dtypes: int64(3), object(1)
memory usage: 228.0+ bytes


In [4]:
# 定義票證資料的欄位名稱 Note:需要手動調整
direction_col = 'DIRECTION'
getontime_col ='GETON_DATE'
getofftime_col ='GETOFF_DATE'
getonstop_col ='GETON_STOP_NAME'
getoffstop_col ='GETOFF_STOP_NAME'
getonseq_col ='GETON_STOP_SEQ'
getoffseq_col ='GETOFF_STOP_SEQ'
routename_col = "ROUTE_NAME"

## 資料前處理

1. 票證清洗(去除不可用資料)
2. 票證定義日期欄位 (年月、平假日)
3. 處理票證放大率
4. 營運資料處理（計算象限）

In [5]:
'''進行基礎的票證清洗
1. 找到上車時間 < 下車時間
2. 上車站序 < 下車站序
3. 上下車站名不同'''

# 定義 tickets.csv 的相對路徑
tickets_path = os.path.join(inputfolder_path , 'tickets.csv')
tickets = pd.read_csv(tickets_path)

# 讀取資料並進行清理
tickets, errorstat, correctrate = tickets_cleaning(tickets, 
    getontime=getontime_col, 
    getofftime=getofftime_col, 
    getonstop=getonstop_col, 
    getoffstop=getoffstop_col, 
    getonseq=getonseq_col, 
    getoffseq=getoffseq_col)

# 把清洗過的資料轉存至process
tickets.to_csv(os.path.join(processfolder_path , 'tickets_cleaned.csv'))
# tickets.to_csv(os.path.join(os.path.dirname(__file__), '..', 'process', 'tickets_cleaned.csv'))

# 輸出數據清洗統計
errorstat_path = os.path.join(outputfolder_path , 'ErrorDataStat.txt')
with open(errorstat_path , 'w', encoding='utf-8') as file:
    for key, value in errorstat.items():
        file.write(f"{key}: {value}\n")
# del errorstat
tickets = date_defined(tickets, getontime_columns=getontime_col, date_turn_holiday=date_turn_holiday,\
                       date_turn_workday=date_turn_workday)


  tickets = pd.read_csv(tickets_path)


In [6]:
# tickets = pd.read_csv(os.path.join(processfolder_path , 'tickets_cleaned.csv'))

In [7]:
'''
帶入營運資料進行計算
1. 處理票證資料放大率
2. 透過營運月報指標計算投資報酬四象限
'''

operation = pd.read_csv(os.path.join(inputfolder_path, 'operation.csv'))

# 計算 DataYearMonth 並格式化
operation['DataYearMonth'] = (
    pd.to_datetime((operation['YEAR'] + 1911) * 100 + operation['MONTH'], format='%Y%m')
    .dt.strftime('%Y%m')
)

# 計算營運績效指標
operationcount = operation_calcuate(df = operation , operator_col='OPERATOR_NAME', routename_col='ROUTE_NAME',
                        drivingmiles_col='OPERATING_DRIVING_MILES', shift_col='VEHICLE_SHIFTS',
                        passengers_col='PASSENGERS', passengerkilometers_col='PASSENGER_KILOMETERS',
                        income_col='INCOME', dayscountdf = dayscount)
operationcount = define_quadrant(operationcount, ['DataYearMonth'], xcol='DailyShifts', ycol='PassengersPerKilometers', measure='mean')
operationcount.to_csv(os.path.join(outputfolder_path, 'operation_yearmonth.csv'), index = False, encoding= 'big5')
del operationcount

tickets_magnification = getMagnification(
    tickets=tickets,
    tickets_routename_col=routename_col,
    tickets_yearmonth_col='DataYearMonth',  # 指定票證數據的年月欄位 Note:需要手動調整
    operation=operation,
    operation_routename_col='ROUTE_NAME', # 這個Operation須要手動調整欄位名稱 Note:需要手動調整
    operation_yearmonth_col='DataYearMonth',  # 指定運營數據的年月欄位 Note:需要手動調整
    operation_passengers_col='PASSENGERS' # Note:需要手動調整
)

# 列出所有放大率會有異常的路線
ooc_route_list = list(set(tickets_magnification[tickets_magnification['Magnification'] >= 1.3]['RouteName'].unique()).union(
    set(tickets_magnification[tickets_magnification['Magnification'] <= 0.8]['RouteName'].unique())
))

In [8]:
# 讀取班表資料
shift = pd.read_csv(os.path.join(inputfolder_path, 'shift.csv'))
shift['Shift'] = shift['Shift'].astype(str)
shift['Shift'] = pd.to_datetime(shift['Shift'], format='%H:%M').dt.time
shift = shift.sort_values(['RouteName', 'IsWorkday','Shift', 'Direction'], ascending=[True, True, True, True]).reset_index(drop = True)

# 讀取相關的站序 
seq = pd.read_csv(os.path.join(inputfolder_path,'seq.csv'))

# 具有班表的RouteName_list
shift_routename_list = list(shift['RouteName'].unique()) 
tickets_routename_list = list(tickets[routename_col].unique())
seq_routename_list = list(shift['RouteName'].unique())

# 不在 tickets_routename_list 中但在 shift_routename_list 中的項目
only_in_shift = list(set(shift_routename_list) - set(tickets_routename_list))
# 不在 shift_routename_list 中但在 tickets_routename_list 中的項目
only_in_tickets = list(set(tickets_routename_list) - set(shift_routename_list))
common_routes = list(set(tickets_routename_list) & set(shift_routename_list) & set(seq_routename_list))

# 印出結果
print("缺票證資料:", only_in_shift)
print("缺班表資料:", only_in_tickets)
print("本次可算的路線:", common_routes)

缺票證資料: ['176', '722A', '309C', '5096', '5616A', '5099A', 'T517A', '261', '723A', '266', '263', '262', '5616C', '722', '506', '5010', '265A', '5035', '5006', 'T516B', '5107B', '5106B', 'T516', '5008', '5086C', '5018', '5616B', '5022', '201', '5050A', 'T516A', '5107C', 'T513', 'T512', '609', '5081A', '265B', '5106A', '5096A', 'T517', '608', 'T515', '5022A', '208C', '5019', '181', '723', '724', '5011', 'T512A', '5107A', '5109A', '119']
缺班表資料: ['大園高中上南坎線學生專車', '陽明高中中壢線學生專車', '503', '永豐高中(八德線)', 'F906', '陽明高中南祥線學生專車', '壽山高中大園線學生專車', '217', '大園高中(楊梅線)', '桃園高中南祥線學生專車', '中大壢中(大湳線)', 602, 603, '內壢高中(龍岡線)', 606, 607, '壽山高中竹圍線學生專車', '南崁高中(大園線)', '505', '內壢高中竹圍乙線學生專車', '永豐高中龍潭線學生專車', '內壢高中大溪線學生專車', '602', '內壢高中中豐大坪線學生專車', '5646B', '平鎮高中(華勛工業區線)', '壽山高中竹圍聯邦線學生專車', '502', '東興國中(華勛線)', '內壢高中竹圍甲線學生專車', '內壢高中九龍線學生專車', '桃/壽高中-林口線', '大溪高中(中壢C+E線)', '內壢高中大園甲線學生專車', '5646A', 703, '內壢高中渴望中豐線學生專車', 708, 'F902A', '大溪高遶經羅浮學專車', 'F905', 217, '715A', '南崁高中(青溪中正線)', '大園高中士校內壢線學生專車', '大園高中下南坎線學生專車', '陽明高中大園線學生專車',

### 基本判讀指標：是否繼續往下做

1. 列出本次資料正常資料的佔比
2. 列出本次放大率異常的路線 ( 可以進一步以plotly 圖表檢視長條圖)

In [9]:
print(f'資料可用比例 = {correctrate}%',end=' ')
if correctrate <= 95:
    print('本次取得的資料錯誤率太高，建議重新檢視')
else : 
    print('本次的資料可以使用')

try:
    if len(ooc_route_list) > 0:
        print(f'本次放大率異常路線共{len(ooc_route_list)}條')
        print('票證放大率異常的路線編號', end= ':')
        print(ooc_route_list)
except:
    pass

資料可用比例 = 98.9% 本次的資料可以使用
本次放大率異常路線共209條
票證放大率異常的路線編號:['大園高中上南坎線學生專車', '221', '陽明高中中壢線學生專車', '5651', '709B', '115A', '503', '永豐高中(八德線)', 'F906', '5110A', '706B', '227A', '118', '227', '5098', 'BR', '陽明高中南祥線學生專車', '225', '壽山高中大園線學生專車', '105A', '5104A', '101', '139', '206C', '155', '5617A', '212A', '220', '217', '5087A', '大園高中(楊梅線)', '桃園高中南祥線學生專車', '156', '中大壢中(大湳線)', '105', 602, 603, '內壢高中(龍岡線)', 606, 607, '5623A', '5086A', '5020', '南崁高中(大園線)', '壽山高中竹圍線學生專車', '內壢高中竹圍乙線學生專車', '708', '168A', '永豐高中龍潭線學生專車', '內壢高中大溪線學生專車', '602', '222', '內壢高中中豐大坪線學生專車', '5053A', '5646B', '平鎮高中(華勛工業區線)', '5027A', '5044', '5040A', '5073A', '壽山高中竹圍聯邦線學生專車', '5090', '169', '502', '東興國中(華勛線)', '151', '內壢高中竹圍甲線學生專車', '內壢高中九龍線學生專車', '桃/壽高中-林口線', '5646', '5623', '大溪高中(中壢C+E線)', '內壢高中大園甲線學生專車', '1B', '228', '5646A', '5650', '5099', '601', '5000', 703, '內壢高中渴望中豐線學生專車', '206D', '5065', 708, 'F902A', '大溪高遶經羅浮學專車', '213', '133', '5650A', '137', '307', '115B', '708A', '709', 'F905', 217, '5651A', '5089', '5061', '5617', '

In [10]:
unique_year_months = tickets_magnification["DataYearMonth"].unique()
# 創建篩選器 (Dropdown)
dropdown = widgets.Dropdown(
    options=unique_year_months,
    value=unique_year_months[0],
    description="月份:"
)

# 定義繪圖函數
def plot_barchart(selected_month):
    # 篩選 DataFrame
    filtered_df = tickets_magnification[tickets_magnification["DataYearMonth"] == selected_month]
    
    if filtered_df.empty:
        print(f"No data available for {selected_month}")
        return
    
    # 創建條形圖
    fig = go.Figure()

    # 定義顯示在 hover 上的格式
    hover_text_tickets = [
    f"RouteName: {row['RouteName']}<br>Magnification: {row['Magnification'] * 100:.2f}%<br>Tickets: {row['Tickets']:,}"  # Magnification 顯示為百分比，Tickets 顯示為實際數字
    for _, row in filtered_df.iterrows()
    ]
    hover_text_passengers = [
    f"RouteName: {row['RouteName']}<br>Magnification: {row['Magnification'] * 100:.2f}%<br>Passengers: {row['Passengers']:,}"  # Magnification 顯示為百分比，Passengers 顯示為實際數字
    for _, row in filtered_df.iterrows()
    ]

    # 添加 Tickets 的長條圖
    fig.add_trace(go.Bar(
        x=filtered_df["RouteName"],
        y=filtered_df["Tickets"],
        name="Tickets",
        marker_color="#84C1FF",
        hovertext=hover_text_tickets,  # 顯示格式化過的 hovertext
        hoverinfo="text"  # 只顯示 hovertext 的內容
    ))

    # 添加 Passengers 的長條圖
    fig.add_trace(go.Bar(
        x=filtered_df["RouteName"],
        y=filtered_df["Passengers"],  # 更新欄位名稱為 Passengers
        name="Passengers",
        marker_color="#FF8000",
        hovertext=hover_text_passengers,  # 顯示格式化過的 hovertext
        hoverinfo="text"  # 只顯示 hovertext 的內容
    ))

    # 設定標題與軸標籤
    fig.update_layout(
        title=f"Tickets and Passengers for {selected_month}",
        xaxis_title="路線編號",
        yaxis_title="人次",
        barmode="group",  # 並列顯示長條圖
        xaxis_tickangle=-90,
        template="plotly_white"  # 使用白色背景的模板
    )

    # 顯示圖表
    fig.show()

# 綁定事件到篩選器
dropdown.observe(lambda change: plot_barchart(change.new), names="value")

# 初始顯示
display(dropdown)
plot_barchart(dropdown.value)


Dropdown(description='月份:', options=('202307', '202308', '202309'), value='202307')

## 資料運算

逐條路線執行
1. 比對班次：依據上車時間比對發車班次（適用班次兼具較少的班次）
2. 班表 - 站序配對
3. 計算**上下車人次**及**站間量**

In [33]:
outputfolder_unmagnification_path = os.path.join(os.getcwd(),'..', 'output','未放大過的每班次載客')
os.makedirs(outputfolder_unmagnification_path, exist_ok=True)

ticketswithshift = []
addlist = []

# 迴圈計算
for route in common_routes[:]: # 以前三條路線進行
    print(route, end = " ")
    routefilename = os.path.join(outputfolder_unmagnification_path, f'{route}.xlsx')
    
    with pd.ExcelWriter(routefilename) as writer:
        # 篩選特定路線的資料
        seq_select = seq[seq['RouteName'] == route].reset_index(drop=True)
        shift_select = shift[shift['RouteName'] == route].reset_index(drop=True)
        
        # 篩選符合月份的票證資料
        tickets_select = tickets[tickets[routename_col] == route].sort_values(getontime_col).reset_index(drop=True)
        yearmonthlist = list(tickets['DataYearMonth'].unique())
        
        onandoff_MAX_list = []

        # 針對每個月份進行處理
        for yearmonth in yearmonthlist:
            sheetname = str(yearmonth)
            
            # 呼叫平假日天數
            holidaycount = dayscount[dayscount['DataYearMonth'] == yearmonth]['Holiday'].values[0]
            Workdaycount = dayscount[dayscount['DataYearMonth'] == yearmonth]['Workday'].values[0]
            shift_select['Days'] = shift_select['IsWorkday'].apply(lambda x: Workdaycount if x == 1 else holidaycount)
            
            # 合併班表和路線序列
            seqwithshift = pd.merge(shift_select, seq_select, on=['RouteName', 'Direction'])

            # 篩選該月份的票證資料
            tickets_select_month = tickets_select[tickets_select['DataYearMonth'] == yearmonth]
            tickets_select_month = tickets_select_month[tickets_select_month[direction_col].isin(list(shift_select['Direction'].unique()))].reset_index(drop=True)
            
            # 比對班表
            tickets_select_month = tickets_match_shift(
                tickets=tickets_select_month,
                shifts=shift_select,
                routename_col=routename_col,
                getontime_col=getontime_col,
                direction_col=direction_col
            )
            ticketswithshift.append(tickets_select_month)
            
            # 計算每站上下車數據
            onandoff = pd.merge(
                seqwithshift,
                tickets_select_month.groupby([routename_col, direction_col, 'IsWorkday', getonseq_col, 'Matched_Shift'])
                .size().reset_index(name='OnCount')
                .rename(columns={routename_col: 'RouteName', direction_col: 'Direction', getonseq_col: 'Seq', 'Matched_Shift': 'Shift'}),
                on=['RouteName', 'Direction', 'IsWorkday', 'Seq', 'Shift'], how='left'
            )
            onandoff = pd.merge(
                onandoff,
                tickets_select_month.groupby([routename_col, direction_col, 'IsWorkday', getoffseq_col, 'Matched_Shift'])
                .size().reset_index(name='OffCount')
                .rename(columns={routename_col: 'RouteName', direction_col: 'Direction', getoffseq_col: 'Seq', 'Matched_Shift': 'Shift'}),
                on=['RouteName', 'Direction', 'IsWorkday', 'Seq', 'Shift'], how='left'
            )
            onandoff[['OnCount', 'OffCount']] = onandoff[['OnCount', 'OffCount']].fillna(0).astype(int)
            
            # 計算站間累計人數
            for i in range(len(onandoff)):
                if onandoff.loc[i, 'Seq'] == 1:
                    onandoff.loc[i, 'OnBus'] = onandoff.loc[i, 'OnCount'] - onandoff.loc[i, 'OffCount']
                else:
                    onandoff.loc[i, 'OnBus'] = onandoff.loc[i - 1, 'OnBus'] + onandoff.loc[i, 'OnCount'] - onandoff.loc[i, 'OffCount']
            onandoff.loc[onandoff['OnBus'] < 0, 'OnBus'] = 0
            
            # 平均上下車數
            onandoff['OnAVG'] = round(onandoff['OnCount'] / onandoff['Days'], 1)
            onandoff['OffAVG'] = round(onandoff['OffCount'] / onandoff['Days'], 1)
            
            # 平均站間人數
            for i in range(len(onandoff)):
                if onandoff.loc[i, 'Seq'] == 1:
                    onandoff.loc[i, 'OnBusAvg'] = onandoff.loc[i, 'OnAVG'] - onandoff.loc[i, 'OffAVG']
                else:
                    onandoff.loc[i, 'OnBusAvg'] = onandoff.loc[i - 1, 'OnBusAvg'] + onandoff.loc[i, 'OnAVG'] - onandoff.loc[i, 'OffAVG']
            onandoff.loc[onandoff['OnBusAvg'] < 0, 'OnBusAvg'] = 0
            onandoff['DataYearMonth'] = yearmonth
            onandoff =  onandoff[[ 'DataYearMonth'] + [col for col in  onandoff.columns if col != 'DataYearMonth']]
            addlist.append(onandoff)
            
            # 寫入該月份的數據
            onandoff.to_excel(writer, sheet_name=sheetname, index=False)
            
            # 計算每月最大站間量
            # onandoff_MAX = onandoff.groupby(['RouteName', 'Direction']).agg(OnBusMax=('OnBusAvg', 'max')).reset_index()
            onandoff_MAX = (onandoff.loc[onandoff.groupby(['RouteName', 'Direction'])['OnBusAvg'].idxmax()]
                            [['RouteName', 'Direction', 'Shift', 'OnBusAvg']]
                            .rename(columns={'OnBusAvg': 'OnBusMax'})
                            .reset_index(drop=True)
                            )
            onandoff_MAX['DataYearMonth'] = yearmonth
            onandoff_MAX = onandoff_MAX[[ 'DataYearMonth'] + [col for col in onandoff_MAX.columns if col != 'DataYearMonth']]
            onandoff_MAX_list.append(onandoff_MAX)
            
        
        # 整理每月最大站間量並寫入
        onandoff_MAX_list = pd.concat(onandoff_MAX_list)
        onandoff_MAX_list.to_excel(writer, sheet_name="每月最大站間量", index=False)


        print('done')


221 done
112S done
5651 done
709B done
115A done
5031 done
236 done
5110A done
5093 done
706B done
227A done
118 done
208 done
227 done
5098 done
BR done
225 done
5021 done
5094 done
302A done
5042 done
170 done
105A done
5043 done
5104A done
101 done
139 done
5112 done
206C done
155 done
5049 done
5607 done
5617A done
212A done
220 done
712 done
5087A done
5053 done
709A done
156 done
105 done
5104 done
5038 done
5623A done
5086A done
5020 done
113 done
168A done
708 done
1A done
5078 done
5028 done
222 done
171 done
5016 done
103 done
5059 done
5053A done
168 done
5027A done
5648 done
5044 done
5040A done
5050 done
232 done
5073A done
5090 done
169 done
151 done
501 done
716 done
5646 done
5623 done
1B done
228 done
251 done
5650 done
5654 done
5099 done
157 done
206 done
710A done
122 done
601 done
5000 done
1 done
206D done
5065 done
213 done
133 done
231 done
167A done
137 done
5650A done
307 done
115B done
708A done
709 done
5651A done
5089 done
5624 done
5061 done
5617 done
208A

In [34]:
ticketswithshift = pd.concat(ticketswithshift)
ticketswithshift.to_csv(os.path.join(processfolder_path, 'TicketsWithShifts.csv'), index = False)

allstop = pd.concat(addlist)
allstop['OD'] = allstop['Seq'].astype(str) + '-' + (allstop['Seq']+1).astype(str)
allstop.to_csv(os.path.join(os.getcwd(),'..', 'output','Busflow.csv'), index=False)

In [39]:
ticketswithshift = ticketswithshift.reset_index(drop=True)

In [72]:
ticketswithshift['Weekday'] = ticketswithshift[getontime_col].dt.day_name()
ticketswithshift_weekdayscount = ticketswithshift.groupby(['DataYearMonth', routename_col, direction_col, 'Matched_Shift', 'Weekday']).size().reset_index(name = 'Passengers').rename(columns = {'Matched_Shift':'Shift'})
ticketswithshift_weekdayscount = pd.merge(ticketswithshift_weekdayscount, dayscount_add, on = ['DataYearMonth', 'Weekday'], how = 'left')
ticketswithshift_weekdayscount['PassengersPerDay'] = round(ticketswithshift_weekdayscount['Passengers'] / ticketswithshift_weekdayscount['Days'], 2)
ticketswithshift_weekdayscount.to_csv(os.path.join(outputfolder_path, 'ShiftWeeklyAvgPassengers.csv'), index = False)

### 轉換為每一個站間的人數多寡

In [None]:
# 產生OD帶寬
def get_OD_line_shp(df, o_col, d_col, o_x_col, o_y_col, d_x_col, d_y_col, count_col, how = 'countd', date_col ,combine = True , span = 100):
    '''
    Parameters:
    df (dataframe) : 要計算的表格，例如信令資料、票證資料統計圖表
    o_col (str) : 起點的名稱 / 站序 / 可判別的欄位
    d_col (str) : 迄點的名稱 / 站序 / 可判別的欄位
    o_x_col (str) : 起點的經度 (wgs84)
    o_y_col (str) : 起點的緯度 (wgs84)
    d_x_col (str) : 迄點的經度 (wgs84)
    d_y_col (str) : 迄點的緯度 (wgs84)
    count_col (str) : 需要統計依據的量值
    combine (Boolean) : True為去定義是否要把同一對起訖端點的量合併成同一條統計量，False則僅顯示OD
    how (str) : 填入'sum'或'mean'
    span (int) : 級距 (default 設為100)

    OthersObject:
    countdf (dataframe):統計出來的OD表 (尚未轉成geodataframe)
    countgdf(geodataframe) : OD帶寬 的 geodataframe 
    '''

    # 1. 不合併雙向OD
    if combine == False : 
        if how != 'countd':
            countdf = df.groupby([o_col, d_col]).agg({o_x_col:'mean', o_y_col:'mean',  d_x_col:'mean', d_y_col:'mean', count_col : how})
        elif how == 'countd':
            countdf = df.groupby([o_col, d_col]).agg({o_x_col:'mean', o_y_col:'mean',  d_x_col:'mean', d_y_col:'mean', count_col : 'sum', date_col:'unique'})
            countdf[count_col] = countdf[count_col] / countdf[date_col]
            countdf = countdf.drop(columns = date_col)
    else :
        if how != 'countd':
            


    return countdf

