# 站間量分析

需要準備資料：
1. 票證資料：須包含所有佔位點的資料
2. 站序資料：需帶有'Direction'欄位
3. 班表資料：需帶有'Direction'、'IsWorkday'欄位
4. 營運月報 (optional)：做票證放大率佐證用 

## 基礎設定

包含環境設定，以及指定對應資料夾路徑（input、process、output）

In [1]:
import os
import pandas as pd
import plotly.graph_objects as go
import ipywidgets as widgets
from IPython.display import display
from tickets_cleaning import tickets_cleaning, date_defined , getDaysCount, getMagnification

In [2]:
inputfolder_path = os.path.join(os.getcwd(),'..', 'input')
outputfolder_path = os.path.join(os.getcwd(),'..', 'output')
processfolder_path = os.path.join(os.getcwd(),'..', 'process')

# 確保資料夾存在
os.makedirs(inputfolder_path, exist_ok=True)
os.makedirs(outputfolder_path, exist_ok=True)
os.makedirs(processfolder_path, exist_ok=True)


In [3]:
date_turn_holiday=[20230929] # 補假、國定假日、颱風天
date_turn_workday=[20230923] # 補班
startdate = 20230701
enddate = 20230930
# 計算每月的假日與平日數
dayscount = getDaysCount(startdate, enddate, date_turn_holiday, date_turn_workday)


## 資料前處理

1. 票證清洗(去除不可用資料)
2. 票證定義日期欄位 (年月、平假日)
3. 處理票證放大率

In [None]:
# '''進行基礎的票證清洗
# 1. 找到上車時間 < 下車時間
# 2. 上車站序 < 下車站序
# 3. 上下車站名不同'''

# # 定義 tickets.csv 的相對路徑
# tickets_path = os.path.join(inputfolder_path , 'tickets.csv')
# tickets = pd.read_csv(tickets_path)

# # 讀取資料並進行清理
# tickets, errorstat, correctrate = tickets_cleaning(tickets, 
#     getontime='GETON_DATE', 
#     getofftime='GETOFF_DATE', 
#     getonstop='GETON_STOP_NAME', 
#     getoffstop='GETOFF_STOP_NAME', 
#     getonseq='GETON_STOP_SEQ', 
#     getoffseq='GETOFF_STOP_SEQ')

# # 把清洗過的資料轉存至process
# tickets.to_csv(os.path.join(processfolder_path , 'tickets_cleaned.csv'))
# # tickets.to_csv(os.path.join(os.path.dirname(__file__), '..', 'process', 'tickets_cleaned.csv'))

# # 輸出數據清洗統計
# errorstat_path = os.path.join(outputfolder_path , 'ErrorDataStat.txt')
# with open(errorstat_path , 'w', encoding='utf-8') as file:
#     for key, value in errorstat.items():
#         file.write(f"{key}: {value}\n")
# # del errorstat
# tickets = date_defined(tickets, getontime_columns='GETON_DATE', date_turn_holiday=date_turn_holiday,\
#                        date_turn_workday=date_turn_workday)

# # 針對這次的資料去做調整
# tickets = tickets.rename(columns = {'ROUTE_NAME':'RouteName'})


  tickets = pd.read_csv(tickets_path)


In [None]:
tickets = pd.read_csv(os.path.join(processfolder_path , 'tickets_cleaned.csv'))

In [11]:
'''處理票證資料放大率'''

operation = pd.read_csv(os.path.join(inputfolder_path, 'operation.csv'))

# 計算 DataYearMonth 並格式化
operation['DataYearMonth'] = (
    pd.to_datetime((operation['YEAR'] + 1911) * 100 + operation['MONTH'], format='%Y%m')
    .dt.strftime('%Y%m')
)

tickets_magnification = getMagnification(
    tickets=tickets,
    tickets_routename_col='RouteName',
    tickets_yearmonth_col='DataYearMonth',  # 指定票證數據的年月欄位
    operation=operation,
    operation_routename_col='ROUTE_NAME',
    operation_yearmonth_col='DataYearMonth',  # 指定運營數據的年月欄位
    operation_passengers_col='PASSENGERS'
)

# 列出所有放大率會有異常的路線
ooc_route_list = list(set(tickets_magnification[tickets_magnification['Magnification'] >= 1.3]['RouteName'].unique()).union(
    set(tickets_magnification[tickets_magnification['Magnification'] <= 0.8]['RouteName'].unique())
))

In [12]:
# # 讀取班表資料
# shift = pd.read_excel(os.path.join(inputfolder_path, 'shift.xlsx'))
# shift.columns = ['RouteName', 'Direction', 'Shift', 'IsWorkday']
# shift['IsWorkday'] = shift['IsWorkday'].replace({'假日': '0', '平日': '1'})
# shift['Shift'] = shift['Shift'].astype(str)
# shift['Shift'] = pd.to_datetime(shift['Shift'], format='%H:%M').dt.time
# shift = shift.sort_values(['RouteName', 'IsWorkday','Shift', 'Direction'], ascending=[True, True, True, True])

# 讀取班表資料
shift = pd.read_csv(os.path.join(inputfolder_path, 'shift.csv'))
shift['Shift'] = shift['Shift'].astype(str)
shift['Shift'] = pd.to_datetime(shift['Shift'], format='%H:%M').dt.time
shift = shift.sort_values(['RouteName', 'IsWorkday','Shift', 'Direction'], ascending=[True, True, True, True]).reset_index(drop = True)

# 讀取相關的站序 
seq = pd.read_csv(os.path.join(inputfolder_path,'seq.csv'))

# 具有班表的RouteName_list
shift_routename_list = list(shift['RouteName'].unique()) 
tickets_routename_list = list(tickets['RouteName'].unique())
seq_routename_list = list(shift['RouteName'].unique())

# 不在 tickets_routename_list 中但在 shift_routename_list 中的項目
only_in_shift = list(set(shift_routename_list) - set(tickets_routename_list))
# 不在 shift_routename_list 中但在 tickets_routename_list 中的項目
only_in_tickets = list(set(tickets_routename_list) - set(shift_routename_list))
common_routes = list(set(tickets_routename_list) & set(shift_routename_list) & set(seq_routename_list))

# 印出結果
print("缺票證資料:", only_in_shift)
print("缺班表資料:", only_in_tickets)
print("本次可算的路線:", common_routes)

缺票證資料: ['176', '5035', '5022A', '5086C', '5008', '5616A', 'T517A', '309C', 'T516', '608', '119', '5018', '5022', '266', '722', '208C', '5106B', '262', '5006', '5019', '181', '5107B', '5099A', '5010', '724', 'T512A', '265B', '5616B', '5050A', 'T516A', '5616C', '723', 'T517', '201', '261', 'T516B', '723A', 'T515', '5096', '5011', '506', '609', '5109A', '263', 'T513', '265A', '5107A', 'T512', '5096A', '5106A', '722A', '5107C', '5081A']
缺班表資料: ['桃園高中大園線學生專車', '內壢高中渴望金陵線學生專車', 'F902', '715A', 'F902A', '南崁高中(八德大有線)', '壽山高中竹圍線學生專車', '桃園高中崎海線學生專車', '振聲高中大園線學生專車', '桃園高中南祥線學生專車', '武陵高中大興莊敬線學生專車', '南崁高中-中正甲線', '大溪高中蝙蝠洞線學生專車', '503', '內壢高中(新屋線)', 602, 603, 606, 607, '壽山高中大湳線學生專車', '壽山高中三鶯線學生專車', '內壢高中(東社線)', '內壢高中中豐大坪線學生專車', '大園高中上南坎線學生專車', 'F901', '武陵高中東興下課', '永豐高中平鎮線學生專車', 'F901A', '5086B', '內壢高中大園甲線學生專車', '桃園高中-大溪線', '平鎮高中(華勛工業區線)', '大溪高中龍潭Ｂ線學生專車', '陽明高中南祥線學生專車', '南崁高中(大園線)', '710B', '內壢高中九龍線學生專車', 703, 708, '大溪高中桃園Ａ線學生專車', '602', '大園高中(楊梅線)', '5646A', '502', 217, '大溪高中(中壢C+E線)', '永豐高中八德線學生專車',

### 基本判讀指標：是否繼續往下做

1. 列出本次資料正常資料的佔比
2. 列出本次放大率異常的路線 ( 可以進一步以plotly 圖表檢視長條圖)

In [13]:
print(f'資料可用比例 = {correctrate}%',end=' ')
if correctrate <= 95:
    print('本次取得的資料錯誤率太高，建議重新檢視')
else : 
    print('本次的資料可以使用')

try:
    if len(ooc_route_list) > 0:
        print(f'本次放大率異常路線共{len(ooc_route_list)}條')
        print('票證放大率異常的路線編號', end= ':')
        print(ooc_route_list)
except:
    pass

資料可用比例 = 98.9% 本次的資料可以使用
本次放大率異常路線共327條
票證放大率異常的路線編號:['桃園高中大園線學生專車', '內壢高中渴望金陵線學生專車', '139', '252A', '715A', '5078', '5099', '229', '232A', '南崁高中(八德大有線)', '111', '172', '5055', '173', '711', '振聲高中大園線學生專車', '桃園高中崎海線學生專車', '5060', '5014A', '桃園高中南祥線學生專車', '251', '5041', '5623', '武陵高中大興莊敬線學生專車', '137', '503', '706B', '225A', '壽山高中三鶯線學生專車', '內壢高中(東社線)', '內壢高中中豐大坪線學生專車', '大園高中上南坎線學生專車', '708A', '603', '5674', '112S', '1B', '206E', '5650', '武陵高中東興下課', '永豐高中平鎮線學生專車', '5118', '703', '5028', '5086B', '內壢高中大園甲線學生專車', '208A', '5624', '252', '桃園高中-大溪線', '5649', '152', '168A', '平鎮高中(華勛工業區線)', '5027', '706A', '701', '大溪高中龍潭Ｂ線學生專車', '105B', '陽明高中南祥線學生專車', '206', '5065', '157', '5031', '大溪高中桃園Ａ線學生專車', '5097', '5110', '大園高中(楊梅線)', '5016', '113', '167', '502', 217, '大溪高中(中壢C+E線)', '501', '永豐高中八德線學生專車', '5038', '223', '5646B', '227', '武陵高中(龍岡線)', '5056', '5086A', '607', '117A', 'F906', 'F907', '132', '208', '606', '706', '5617', '5112', '5083', '5654', '大溪高遶經羅浮學專車', 'GR', '5086', '5645', '5653', '5085', '

In [14]:
# unique_year_months = tickets_magnification["DataYearMonth"].unique()
# # 創建篩選器 (Dropdown)
# dropdown = widgets.Dropdown(
#     options=unique_year_months,
#     value=unique_year_months[0],
#     description="月份:"
# )

# # 定義繪圖函數
# def plot_barchart(selected_month):
#     # 篩選 DataFrame
#     filtered_df = tickets_magnification[tickets_magnification["DataYearMonth"] == selected_month]
    
#     if filtered_df.empty:
#         print(f"No data available for {selected_month}")
#         return
    
#     # 創建條形圖
#     fig = go.Figure()

#     # 定義顯示在 hover 上的格式
#     hover_text_tickets = [
#     f"RouteName: {row['RouteName']}<br>Magnification: {row['Magnification'] * 100:.2f}%<br>Tickets: {row['Tickets']:,}"  # Magnification 顯示為百分比，Tickets 顯示為實際數字
#     for _, row in filtered_df.iterrows()
#     ]
#     hover_text_passengers = [
#     f"RouteName: {row['RouteName']}<br>Magnification: {row['Magnification'] * 100:.2f}%<br>Passengers: {row['Passengers']:,}"  # Magnification 顯示為百分比，Passengers 顯示為實際數字
#     for _, row in filtered_df.iterrows()
#     ]

#     # 添加 Tickets 的長條圖
#     fig.add_trace(go.Bar(
#         x=filtered_df["RouteName"],
#         y=filtered_df["Tickets"],
#         name="Tickets",
#         marker_color="#84C1FF",
#         hovertext=hover_text_tickets,  # 顯示格式化過的 hovertext
#         hoverinfo="text"  # 只顯示 hovertext 的內容
#     ))

#     # 添加 Passengers 的長條圖
#     fig.add_trace(go.Bar(
#         x=filtered_df["RouteName"],
#         y=filtered_df["Passengers"],  # 更新欄位名稱為 Passengers
#         name="Passengers",
#         marker_color="#FF8000",
#         hovertext=hover_text_passengers,  # 顯示格式化過的 hovertext
#         hoverinfo="text"  # 只顯示 hovertext 的內容
#     ))

#     # 設定標題與軸標籤
#     fig.update_layout(
#         title=f"Tickets and Passengers for {selected_month}",
#         xaxis_title="路線編號",
#         yaxis_title="人次",
#         barmode="group",  # 並列顯示長條圖
#         xaxis_tickangle=-90,
#         template="plotly_white"  # 使用白色背景的模板
#     )

#     # 顯示圖表
#     fig.show()

# # 綁定事件到篩選器
# dropdown.observe(lambda change: plot_barchart(change.new), names="value")

# # 初始顯示
# display(dropdown)
# plot_barchart(dropdown.value)


## 資料運算

1. 把班表資料黏上

In [84]:
route = common_routes[1]
print(route)

5081


In [16]:
# 要開始用for 迴圈套入route 進行計算
# route 以 common_routes 可以的進行 

In [85]:
getontime_col = 'GETON_DATE'
direction_col = 'DIRECTION'

seq_select = seq[seq['RouteName'] == route].reset_index(drop = True)
shift_select = shift[shift['RouteName'] == route].reset_index(drop = True)

# 先挑選特定路線
tickets_select = tickets[tickets['RouteName'] == route].sort_values(getontime_col).reset_index(drop = True)

# 根據這個路線 挑選他有提供的月份進行計算
yearmonthlist = list(tickets['DataYearMonth'].unique())
yearmonth = yearmonthlist[0]
tickets_select_month = tickets_select[tickets_select['DataYearMonth'] == yearmonth].sort_values(getontime_col).reset_index(drop = True)
tickets_select_month = tickets_select_month[tickets_select_month['DIRECTION'].isin(list(shift_select['Direction'].unique()))].sort_values(getontime_col).reset_index(drop = True)
# tickets_select_month = tickets_select_month.rename(columns= {direction_col:'Direction'})

以下嘗試

In [99]:
def tickets_match_shift(tickets, shifts, routename_col = "ROUTE_NAME" ,getontime_col='GETON_DATE', direction_col='DIRECTION'):
    """
    將刷卡資料匹配到最接近的班次。

    參數:
        tickets (DataFrame): 包含刷卡資料的 DataFrame。
        shifts (DataFrame): 包含班次資料的 DataFrame。
        getontime_col (str): 表示刷卡時間的欄位名稱，默認為 'GETON_DATE'。
        direction_col (str): 表示方向的欄位名稱，默認為 'DIRECTION'。

    返回:
        DataFrame: 加入 "Matched_Shift" 欄位的刷卡資料 DataFrame。
    """
    import pandas as pd
    # 轉換時間欄位格式
    tickets[getontime_col] = pd.to_datetime(tickets[getontime_col])
    shifts["Shift"] = pd.to_datetime(shifts["Shift"], format="%H:%M:%S").dt.time

    # 定義函數來匹配班次
    def match_shift(row, available_shifts):
        geton_time = row[getontime_col].time()
        available_shifts = sorted(available_shifts)
        for i in range(len(available_shifts)):
            if geton_time < available_shifts[i]:  # 比最早的 Shift 早
                return available_shifts[max(0, i - 1)]  # 返回上一個班次（或第一個班次）
        return available_shifts[-1]  # 晚於所有班次，返回最後一個班次

    # 匹配班次的主要邏輯
    matched_shifts = []
    for _, ticket in tickets.iterrows():
        # 找到對應的班次
        route_shifts = shifts[
            (shifts["RouteName"] == ticket[routename_col]) &
            (shifts["IsWorkday"] == ticket["IsWorkday"]) &
            (shifts["Direction"] == ticket[direction_col])
        ]["Shift"].tolist()

        # 如果有班次，匹配
        if route_shifts:
            matched_shift = match_shift(ticket, route_shifts)
            matched_shifts.append(matched_shift)
        else:
            matched_shifts.append(None)  # 如果沒有匹配的班次

    # 新增匹配結果欄位
    tickets["Matched_Shift"] = matched_shifts
    return tickets

In [100]:
tickets_select_month = tickets_match_shift(tickets_select_month.rename(columns={'RouteName':'ROUTE_NAME'}), shift_select)
tickets_select_month

Unnamed: 0,OPERATOR_NAME,ROUTE_TYPE,OPERATOR_ID,CAR_NUMBER,ROUTE_ID,ROUTE_NAME,CARD_OPERATOR,CARDID_DEC,CARDID_HEX,CITIZENCARD,...,GETOFF_LON,GETOFF_COUNTY,GETOFF_TOWN,GETOFF_VILL,SHOULD_DEDUCTED,CONSUMPTION_DEDUCTED,SUBTOTAL,IsWorkday,DataYearMonth,Matched_Shift
0,桃園客運,市區公車,1,KKA-3761,5081,5081,ECC,1407547361,53E577E1,0,...,121.217160,桃園市,中壢區,光明里,18,,0.0,1,202309,07:50:00
1,桃園客運,市區公車,1,KKA-3761,5081,5081,ECC,1300217889,4D7FC021,0,...,121.217160,桃園市,中壢區,光明里,35,,35.0,1,202309,07:50:00
2,桃園客運,市區公車,1,KKA-3761,5081,5081,ECC,1056759829,3EFCE015,0,...,121.213886,桃園市,中壢區,青埔里,18,,18.0,1,202309,07:50:00
3,桃園客運,市區公車,1,KKA-3761,5081,5081,ECC,1462116001,57261EA1,0,...,121.216688,桃園市,中壢區,光明里,16,,0.0,1,202309,07:50:00
4,桃園客運,市區公車,1,KKA-3761,5081,5081,ECC,814250497,30887A01,0,...,121.224006,桃園市,中壢區,石頭里,16,,0.0,1,202309,07:50:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1408,桃園客運,市區公車,1,FAD-507,5081,5081,ECC,1181051397,46656A05,0,...,121.224006,桃園市,中壢區,石頭里,31,,0.0,0,202309,15:20:00
1409,桃園客運,市區公車,1,FAD-507,5081,5081,ECC,1167103217,459094F1,0,...,121.213886,桃園市,中壢區,青埔里,18,,18.0,0,202309,15:20:00
1410,桃園客運,市區公車,1,FAD-507,5081,5081,ECC,1135654645,43B0B6F5,0,...,121.224006,桃園市,中壢區,石頭里,18,,18.0,0,202309,15:20:00
1411,桃園客運,市區公車,1,FAD-507,5081,5081,ECC,340189653,1446E1D5,0,...,121.216688,桃園市,中壢區,光明里,18,,18.0,0,202309,12:35:00


In [86]:
getontime_col = 'GETON_DATE'
direction_col = 'DIRECTION'

# 轉換時間欄位格式
tickets_select_month[getontime_col] = pd.to_datetime(tickets_select_month[getontime_col])
shift_select["Shift"] = pd.to_datetime(shift_select["Shift"], format="%H:%M:%S").dt.time

# 定義函數來匹配班次
def match_shift(row, shifts):
    geton_time = row[getontime_col].time()
    shifts = sorted(shifts)
    for i in range(len(shifts)):
        if geton_time < shifts[i]:  # 比最早的 Shift 早
            return shifts[max(0, i - 1)]  # 返回上一個班次（或第一個班次）
    return shifts[-1]  # 晚於所有班次，返回最後一個班次

# 匹配班次函數
def assign_matched_shifts(tickets_df, shifts_df):
    matched_shifts = []
    for _, ticket in tickets_df.iterrows():
        # 找到對應的班次
        route_shifts = shifts_df[
            (shifts_df["RouteName"] == ticket["RouteName"]) &
            (shifts_df["IsWorkday"] == ticket["IsWorkday"]) &
            (shifts_df["Direction"] == ticket[direction_col])
        ]["Shift"].tolist()

        # 如果有班次，匹配
        if route_shifts:
            matched_shift = match_shift(ticket, route_shifts)
            matched_shifts.append(matched_shift)
        else:
            matched_shifts.append(None)  # 如果沒有匹配的班次

    # 新增匹配結果欄位
    tickets_df["Matched_Shift"] = matched_shifts
    return tickets_df

# 執行匹配
tickets_select_month = assign_matched_shifts(tickets_select_month, shift_select)


In [89]:
shift_select

Unnamed: 0,RouteName,IsWorkday,Direction,Shift,IsCycle
0,5081,0,1,07:50:00,0
1,5081,0,0,10:35:00,0
2,5081,0,0,12:35:00,0
3,5081,0,1,15:20:00,0
4,5081,1,1,07:50:00,0
5,5081,1,0,10:30:00,0
6,5081,1,0,12:35:00,0
7,5081,1,1,15:20:00,0


In [93]:
tickets_select_month[ (tickets_select_month['IsWorkday'] == 0) & (tickets_select_month[direction_col] == 1) ][['RouteName','GETON_DATE','IsWorkday',direction_col,'GETON_STOP_SEQ','GETON_STOP_NAME', 'GETOFF_STOP_SEQ','GETOFF_STOP_NAME','Matched_Shift']].head(30)

Unnamed: 0,RouteName,GETON_DATE,IsWorkday,DIRECTION,GETON_STOP_SEQ,GETON_STOP_NAME,GETOFF_STOP_SEQ,GETOFF_STOP_NAME,Matched_Shift
52,5081,2023-09-02 07:49:04,0,1,1.0,大園車站,42.0,中壢總站,07:50:00
53,5081,2023-09-02 07:58:16,0,1,11.0,照鏡,42.0,中壢總站,07:50:00
54,5081,2023-09-02 08:01:17,0,1,14.0,下洽溪,37.0,柑園,07:50:00
55,5081,2023-09-02 08:01:20,0,1,14.0,下洽溪,37.0,柑園,07:50:00
56,5081,2023-09-02 08:06:33,0,1,21.0,青埔國中,42.0,中壢總站,07:50:00
57,5081,2023-09-02 08:07:10,0,1,21.0,青埔國中,22.0,高鐵桃園站,07:50:00
58,5081,2023-09-02 08:09:15,0,1,22.0,高鐵桃園站,25.0,五號坡,07:50:00
59,5081,2023-09-02 08:16:26,0,1,24.0,領航南文德路口,38.0,新明國小(明德路),07:50:00
60,5081,2023-09-02 08:21:38,0,1,28.0,農場,38.0,新明國小(明德路),07:50:00
61,5081,2023-09-02 08:32:07,0,1,37.0,柑園,40.0,中央新生路口,07:50:00


In [88]:
# getontime_col = 'GETON_DATE'
# direction_col = 'DIRECTION'

# # 轉換時間欄位格式
# tickets_select_month["GETON_DATE"] = pd.to_datetime(tickets_select_month["GETON_DATE"])
# shift_select["Shift"] = pd.to_datetime(shift_select["Shift"], format="%H:%M:%S").dt.time

# # 定義函數來匹配班次
# def match_shift(row, shifts):
#     geton_time = row["GETON_DATE"].time()
#     shifts = sorted(shifts)
#     for i in range(len(shifts)):
#         if geton_time < shifts[i]:  # 比最早的 Shift 早
#             return shifts[max(0, i - 1)]  # 返回上一個班次（或第一個班次）
#     return shifts[-1]  # 晚於所有班次，返回最後一個班次

# # 開始比對
# matched_shifts = []
# for _, ticket in tickets_select_month.iterrows():
#     # 找到對應的班次
#     route_shifts = shift_select[
#         (shift_select["RouteName"] == ticket["RouteName"]) &
#         (shift_select["IsWorkday"] == ticket["IsWorkday"]) &
#         (shift_select["Direction"] == ticket["Direction"])
#     ]["Shift"].tolist()

#     # 如果有班次，匹配
#     if route_shifts:
#         matched_shift = match_shift(ticket, route_shifts)
#         matched_shifts.append(matched_shift)
#     else:
#         matched_shifts.append(None)  # 如果沒有匹配的班次

# # 新增匹配結果欄位
# tickets_select_month["Matched_Shift"] = matched_shifts

# # 輸出結果
# print(tickets_select_month)


In [66]:
tickets_select_month[['RouteName','GETON_DATE','IsWorkday','Direction','GETON_STOP_SEQ','GETON_STOP_NAME', 'GETOFF_STOP_SEQ','GETOFF_STOP_NAME','Matched_Shift']]

Unnamed: 0,RouteName,GETON_DATE,IsWorkday,Direction,GETON_STOP_SEQ,GETON_STOP_NAME,GETOFF_STOP_SEQ,GETOFF_STOP_NAME,Matched_Shift
0,206A,2023-09-01 07:57:00,1,0,1.0,桃園總站,23.0,八股坡,08:00:00
1,206A,2023-09-01 08:07:53,1,0,3.0,桃花園飯店,8.0,九崁店,08:00:00
2,206A,2023-09-01 08:08:04,1,0,3.0,桃花園飯店,14.0,永安北路,08:00:00
3,206A,2023-09-01 08:08:07,1,0,3.0,桃花園飯店,34.0,洪厝,08:00:00
4,206A,2023-09-01 08:08:09,1,0,3.0,桃花園飯店,10.0,力行路口,08:00:00
...,...,...,...,...,...,...,...,...,...
558,206A,2023-09-30 12:03:56,0,0,3.0,桃花園飯店,11.0,蓮華寺,12:00:00
559,206A,2023-09-30 12:07:05,0,0,4.0,桃園郵局,27.0,蘆竹親子館(大竹),12:00:00
560,206A,2023-09-30 12:07:09,0,0,4.0,桃園郵局,29.0,廟前,12:00:00
561,206A,2023-09-30 12:07:14,0,0,4.0,桃園郵局,38.0,高鐵桃園站,12:00:00


In [51]:
tickets_select_month[tickets_select_month['DIRECTION'] == 0][['RouteName','GETON_DATE','IsWorkday','DIRECTION','GETON_STOP_SEQ','GETON_STOP_NAME', 'GETOFF_STOP_SEQ','GETOFF_STOP_NAME']]

Unnamed: 0,RouteName,GETON_DATE,IsWorkday,DIRECTION,GETON_STOP_SEQ,GETON_STOP_NAME,GETOFF_STOP_SEQ,GETOFF_STOP_NAME
17,5081,2023-09-01 10:28:59.000,1,0,1.0,中壢總站,13.0,大丘田
18,5081,2023-09-01 10:29:01.000,1,0,1.0,中壢總站,10.0,三座屋
19,5081,2023-09-01 10:29:09.000,1,0,1.0,中壢總站,10.0,三座屋
20,5081,2023-09-01 10:29:13.000,1,0,1.0,中壢總站,36.0,淨蓮寺
21,5081,2023-09-01 10:29:18.000,1,0,1.0,中壢總站,40.0,大園車站
...,...,...,...,...,...,...,...,...
1402,5081,2023-09-30 10:36:33.000,0,0,4.0,新明國小(明德路),28.0,下洽溪
1403,5081,2023-09-30 12:57:05.000,0,0,20.0,高鐵桃園站,33.0,頂莊
1404,5081,2023-09-30 12:57:07.000,0,0,20.0,高鐵桃園站,33.0,頂莊
1411,5081,2023-09-30 18:41:10.000,0,0,1.0,中壢總站,4.0,新明國小(明德路)


In [34]:
tickets_select_month[['RouteName','GETON_DATE','IsWorkday','DIRECTION','GETON_STOP_SEQ', 'GETOFF_STOP_SEQ']]

Unnamed: 0,RouteName,GETON_DATE,IsWorkday,DIRECTION,GETON_STOP_SEQ,GETOFF_STOP_SEQ
0,206A,2023-09-01 07:57:00.000,1,0,1.0,23.0
1,206A,2023-09-01 08:07:53.000,1,0,3.0,8.0
2,206A,2023-09-01 08:08:04.000,1,0,3.0,14.0
3,206A,2023-09-01 08:08:07.000,1,0,3.0,34.0
4,206A,2023-09-01 08:08:09.000,1,0,3.0,10.0
...,...,...,...,...,...,...
595,206A,2023-09-30 12:03:56.000,0,0,3.0,11.0
596,206A,2023-09-30 12:07:05.000,0,0,4.0,27.0
597,206A,2023-09-30 12:07:09.000,0,0,4.0,29.0
598,206A,2023-09-30 12:07:14.000,0,0,4.0,38.0


In [29]:
tickets_select_month.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 600 entries, 0 to 599
Data columns (total 34 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   OPERATOR_NAME         600 non-null    object 
 1   ROUTE_TYPE            600 non-null    object 
 2   OPERATOR_ID           600 non-null    int64  
 3   CAR_NUMBER            600 non-null    object 
 4   ROUTE_ID              600 non-null    object 
 5   RouteName             600 non-null    object 
 6   CARD_OPERATOR         600 non-null    object 
 7   CARDID_DEC            600 non-null    object 
 8   CARDID_HEX            600 non-null    object 
 9   CITIZENCARD           600 non-null    int64  
 10  CARD_TYPE             600 non-null    object 
 11  CARD_TYPE_GROUP       600 non-null    object 
 12  DIRECTION             600 non-null    int64  
 13  GETON_DATE            600 non-null    object 
 14  GETON_STOP_SEQ        600 non-null    float64
 15  GETON_STOP_NAME       6

In [None]:
tickets

In [None]:
def find_bus_shift(swipe_time, is_weekend, schedule):
    shifts = schedule['Shift'].tolist()
    if swipe_time < shifts[0]:
        return shifts[0]
    for i in range(1, len(shifts)):
        if shifts[i-1] <= swipe_time < shifts[i]:
            return shifts[i-1]
    return shifts[-1]

# 刷卡找到對應班次
results = []
for i, row in df.iterrows():
    swipe_time = row['上車時間']
    is_weekend = row['平日假日'] == '假日'
    filtered_schedule = sch[sch['WeekendorNot'] == ('假日' if is_weekend else '平日')]
    shift = find_bus_shift(swipe_time, is_weekend, filtered_schedule)
    results.append(shift)

df['Shift'] = results

Unnamed: 0,RouteName,IsWorkday,Direction,Shift,IsCycle
0,206A,0,0,08:00:00,0
1,206A,0,0,12:00:00,0
2,206A,0,0,13:00:00,0
3,206A,1,0,08:00:00,0
4,206A,1,0,12:00:00,0
5,206A,1,0,13:00:00,0
