In [53]:
import os
import geopandas as gpd
import pandas as pd 
from shapely.geometry import Point, LineString

In [54]:
# 00_setup_os處理函數
def create_folder(folder_name):
    """建立資料夾"""
    if not os.path.exists(folder_name):
        os.makedirs(folder_name)
    return os.path.abspath(folder_name)

def findfiles(filefolderpath, filetype='.csv', recursive=True):
    """
    尋找指定路徑下指定類型的檔案，並返回檔案路徑列表。

    Args:
        filefolderpath (str): 指定的檔案路徑。
        filetype (str, optional): 要尋找的檔案類型，預設為 '.csv'。
        recursive (bool, optional): 是否檢索所有子資料夾，預設為 True；反之為False，僅查找當前資料夾的所有file。

    Returns:
        list: 包含所有符合條件的檔案路徑的列表。
    """
    filelist = []

    if recursive:
        # 遍歷資料夾及其子資料夾
        for root, _, files in os.walk(filefolderpath):
            for file in files:
                if file.endswith(filetype):
                    file_path = os.path.join(root, file)
                    filelist.append(file_path)
    else:
        # 僅檢索當前資料夾
        for file in os.listdir(filefolderpath):
            file_path = os.path.join(filefolderpath, file)
            if os.path.isfile(file_path) and file.endswith(filetype):
                filelist.append(file_path)

    return filelist

def read_combined_dataframe(file_list, filepath = True):
    dataframes = []
    
    for file in file_list:
        try:
            if file.endswith('.csv'):
                df = pd.read_csv(file)
            elif file.endswith('.shp'):
                df = gpd.read_file(file)
            elif file.endswith(('.xls', '.xlsx')):
                df = pd.read_excel(file)
            else:
                print(f"Unsupported file format: {file}")
                continue
            if filepath:
                df['FilePath'] = file  # 添加來源檔案路徑欄位
            dataframes.append(df)
        except Exception as e:
            print(f"Error reading {file}: {e}")

    # 合併所有 DataFrame
    combined_df = pd.concat(dataframes, ignore_index=True)
    return combined_df

# 01_geodataframe 圖像處理

def dataframe_to_point(df, lon_col, lat_col, crs="EPSG:4326", target_crs="EPSG:3826"):
    '''
    Parameters:
    df (dataframe) : 含經緯度座標欄位的dataframe
    lon_col (str) : 緯度欄位
    Lat_col (str) : 經度欄位
    crs (str) : 目前經緯度座標的座標系統，常用的為4326(WGS84)、3826(TWD97)
    target_crs：目標轉換的座標系統
    '''

    # from shapely.geometry import Point
    # import pandas as pd
    # import geopandas as gpd
    # Create Point geometries from the longitude and latitude columns
    geometry = [Point(xy) for xy in zip(df[lon_col], df[lat_col])]
    # Create a GeoDataFrame with the original CRS
    gdf = gpd.GeoDataFrame(df, geometry=geometry, crs=crs)
    # Convert the GeoDataFrame to the target CRS
    gdf = gdf.to_crs(epsg=target_crs.split(":")[1])
    return gdf

def get_line(df, x1 = 'Lon_o', x2 = 'Lon_d', y1 = 'Lat_o', y2 = 'Lat_d'):
    '''
    Parameters:
    df (dataframe) : 含經緯度座標欄位的dataframe
    x1 (str) : 起點經度欄位
    y1 (str) : 起點緯度欄位
    x2 (str) : 迄點經度欄位
    y2 (str) : 迄點緯度欄位

    預設立場：輸出為wgs84轉換的經緯度點位
    '''
    # from shapely.geometry import LineString
    # import pandas as pd
    # import geopandas as gpd
    df['geometry'] = df.apply(lambda row: LineString([(row[x1], row[y1]), (row[x2], row[y2])]), axis=1)
    gdf = gpd.GeoDataFrame(df, geometry='geometry')
    # 設定座標系統 (假設 WGS 84 / EPSG:4326)
    gdf.set_crs(epsg=4326, inplace=True)
    return gdf



In [55]:
# 00_Setup 所有全域函數
# 1.) 參數
SL_buffer_meters = 500

# 2.) 資料input資料夾
referencefolder = os.path.abspath(os.path.join(os.getcwd(), '..', '參考資料'))
seqfolder = os.path.abspath(os.path.join(os.getcwd(), '..', '00_TDX資料下載'))
SLfolder = os.path.abspath(os.path.join(os.getcwd(), '..', '..', 'TRTS5屏柵線'))


In [68]:
def get_screenline_and_surveypoint(SLfolder):
    gdf_sl = gpd.read_file(os.path.join(SLfolder, 'TRTS5_屏柵線線型.shp'))
    gdf_sl['Category'] = gdf_sl['TRTS5'].apply(
        lambda x: 0 if 'CD' in x else 1 if 'SL' in x else None
    )
    gdf_sl['Number'] = (
        gdf_sl['TRTS5']
        .str.extract(r'(\d+)')
        .astype(int)
    )
    gdf_sl = gdf_sl.sort_values(['Category', 'Number'])

    gdf_sp = gpd.read_file(os.path.join(SLfolder, 'TRTS5_全部調查點位.shp'))
    gdf_sp = gdf_sp.rename(columns = {'X':'PositionLon', 'Y':'PositionLat'})
    gdf_sp[['TRTS5', 'SP_No']] = gdf_sp['Name'].str.split('-', expand=True)
    gdf_sp['SP_No'] = gdf_sp['SP_No'].astype('int64')
    gdf_sp['Category'] = gdf_sp['TRTS5'].apply(
        lambda x: 0 if 'CD' in x else 1 if 'SL' in x else None
    )
    gdf_sp['Number'] = (
        gdf_sp['TRTS5']
        .str.extract(r'(\d+)')
        .astype(int)
    )
    gdf_sp = gdf_sp.sort_values(['Category', 'Number', 'SP_No'])

    return gdf_sl, gdf_sp

def get_gdf_sp_unique(gdf_sp):
    '''因為調查點位同一個名稱的有太多資料調查來源：先取平均'''

    df_sp_unique = gdf_sp.sort_values(['Category','TRTS5', 'SP_No']).groupby(['Category','TRTS5', 'SP_No']).agg({'Name':'first','PositionLon':'mean', 'PositionLat':'mean', 'Surveyname':'first'}).reset_index()
    gdf_sp_unique = dataframe_to_point(df = df_sp_unique, lon_col = 'PositionLon', lat_col = 'PositionLat', crs="EPSG:4326", target_crs="EPSG:3826")
    

    return gdf_sp_unique

def get_df_seq(seqfolder):
    '''讀取公車站序並轉為geodataframe
    seqfolder(str):站序點位資料csv所在資料夾'''
    df_seq = read_combined_dataframe(file_list=findfiles(seqfolder))
    df_seq = df_seq.reindex(columns = ['RouteUID', 'SubRouteUID','Direction','StopUID', 'StopSequence', 'PositionLon', 'PositionLat'])
    df_seq = df_seq.drop_duplicates(subset= ['RouteUID', 'SubRouteUID','Direction', 'StopSequence', 'PositionLon', 'PositionLat'])
    if (len(df_seq[df_seq['StopSequence'].isna()]) / len(df_seq) < 0.05):
        df_seq = df_seq[df_seq['StopSequence'].notna()] # 會有沒有站序的
    else:
        percentage = ((df_seq[df_seq['StopSequence'].isna()]) / len(df_seq)) * 100
        print(f"===== 需要檢查沒有站序的站點有哪些，共有{percentage}%的資料沒有站序 =====")

    return df_seq

def get_gdfroutepair(df_seq):
    # 先排序，確保順序正確
    df_seq = df_seq.sort_values(
        by=['RouteUID', 'SubRouteUID', 'Direction', 'StopSequence']
    )

    # 建立 To 欄位（往下一站）
    df_seq['ToStop'] = df_seq.groupby(
        ['RouteUID', 'SubRouteUID', 'Direction']
    )['StopUID'].shift(-1)

    df_seq['ToSeq'] = df_seq.groupby(
        ['RouteUID', 'SubRouteUID', 'Direction']
    )['StopSequence'].shift(-1)

    df_seq['ToLon'] = df_seq.groupby(
        ['RouteUID', 'SubRouteUID', 'Direction']
    )['PositionLon'].shift(-1)

    df_seq['ToLat'] = df_seq.groupby(
        ['RouteUID', 'SubRouteUID', 'Direction']
    )['PositionLat'].shift(-1)

    # 建立 From 欄位（本站）
    df_seq['FromSeq'] = df_seq['StopSequence']
    df_seq['FromLon'] = df_seq['PositionLon']
    df_seq['FromLat'] = df_seq['PositionLat']
    df_seq['FromStop'] = df_seq['StopUID']

    # 只保留需要的欄位
    df_result = df_seq.reindex(columns = [
        'RouteUID', 'SubRouteUID', 'Direction',
        'FromStop', 'FromSeq', 'FromLon', 'FromLat',
        'ToStop', 'ToSeq', 'ToLon', 'ToLat'])

    # 移除最後一站（沒有 To）
    df_result = df_result.dropna(subset=['ToSeq'])

    df_result = df_result.reset_index(drop=True)

    gdf_result = get_line(df_result, x1 = 'FromLon', x2 = 'ToLon', y1 = 'FromLat', y2 = 'ToLat')

    return gdf_result



In [69]:
# 讀取模型編修組提供的屏柵線 & 調查點位
gdf_sl, gdf_sp = get_screenline_and_surveypoint(SLfolder=SLfolder)

# 因為提供的點位有分路段，所以先改為同一個地點
gdf_sp_unique = get_gdf_sp_unique(gdf_sp).to_crs(epsg=3826)
gdf_sl_twd97 = gdf_sl.to_crs(epsg=3826).reindex(columns = ['TRTS5', 'Category', 'Number', 'geometry']) # 轉換為TWD97處理


# 屏柵線取buffer
gdf_sl_buffer = gdf_sl_twd97.copy()
gdf_sl_buffer['geometry'] = gdf_sl_buffer.geometry.buffer(SL_buffer_meters)

# 取得公車站序
df_seq = get_df_seq(seqfolder=seqfolder)
gdf_seq = dataframe_to_point(df = df_seq, lon_col='PositionLon', lat_col= 'PositionLat', crs="EPSG:4326", target_crs="EPSG:3826")

# 取得公車路線pair直線
gdf_routepair = get_gdfroutepair(df_seq)