In [1]:
import pandas as pd
import csv

In [12]:
csv_path = 'y39_rebar.csv'
# csv_path = 'cq881_rebar.csv'

# 定義自定義解析器
def custom_csv_parser(file_path, encoding='Big5'):
    rows = []
    with open(file_path, encoding=encoding) as file:
        reader = csv.reader(file)
        for row in reader:
            if len(row) > 8:
                row[7] = ', '.join(row[7:])
                row = row[:8]
            row[-1] = row[-1].rstrip(', ')
            rows.append(row)
    return rows


# 使用自定義解析器讀取文件
rows = custom_csv_parser(csv_path)

# 定義標題列
columns = ["FileName", "EntityName", "ObjectType", "RotationAngle", "CentreCoor", "Height", "Width", "Text"]

# 將結果轉換為 DataFrame
df = pd.DataFrame(rows[1:], columns=columns)

In [None]:
rows, columns = df.shape
for i in range(rows):
    print(df.iloc[i]['Text'])

In [13]:
import re
import pandas as pd
import numpy as np

grouped = df.groupby('FileName')

def parse_coordinate(coord_str: str) -> list:
    # 使用正則表達式提取坐標值
    match = re.match(r'\(([\d.]+)\s+([\d.]+)\s+([\d.]+)\)', coord_str)
    if match:
        return [float(match.group(1)), float(match.group(2)), float(match.group(3))]
    else:
        raise ValueError(f"無法解析坐標: {coord_str}")
    
def find_nearest(candidate_columns: str, x: float, y: float, df: pd.DataFrame) -> int:

    # 選擇有效的候選行（非空值）
    candidate_rows = df[df[candidate_columns].notna()]

    # 解析候選行的坐標
    candidate_coords = np.array([parse_coordinate(coord) for coord in candidate_rows['CentreCoor']])

    # 創建目標坐標
    target_coords = np.array([x, y])

    # 計算距離
    distances = np.sqrt(np.sum((candidate_coords[:, :2] - target_coords)**2, axis=1))

    # 找出最近的點
    nearest_index = np.argmin(distances)

    # 返回最近點的相應列
    return nearest_index

# 遍歷每個組
for name, group in grouped:
    print(f"組名: {name}")

    # 初始化儲存排樁形式的列表
    type_name: str = 'Unknown'
    # 初始化每種類型排樁的資料
    type_data: dict = {'Depth':0,
                       'Diameter':0,
                       'Thickness':0,
                       'Tiebeam_width':0,
                       'Tiebeam_length':0,
                       'Main_rebar':{},
                       'Stirrup_rebar':{},
                       'Concrete_strength':0}
    
    ''' 0. 一些預抓取資料'''
    # EL
    EL_pattern:str = r'^EL\s+(\d+\.\d+)'
    group['Extracted_EL'] = group['Text'].str.extract(EL_pattern)
    group['Extracted_EL'] = group['Extracted_EL'].astype(float)
    EL_min: float = group['Extracted_EL'].dropna().min()
    EL_max: float = group['Extracted_EL'].dropna().max()
    # print(f"EL: {EL_min} ~ {EL_max}")

    # 鋼筋
    stirrup_rebar_pattern:str = r'^D(\d+)@(\d+)$'
    group['Extracted_Rebar_diameter'], group['Extracted_Rebar_spacing'] = zip(*group['Text'].str.extract(stirrup_rebar_pattern).values)
    group['Extracted_Rebar_diameter'] = pd.to_numeric(group['Extracted_Rebar_diameter'], errors='coerce')
    group['Extracted_Rebar_spacing'] = pd.to_numeric(group['Extracted_Rebar_spacing'], errors='coerce')
    group['Extracted_Rebar_diameter'] = group['Extracted_Rebar_diameter'].astype('Int64')
    group['Extracted_Rebar_spacing'] = group['Extracted_Rebar_spacing'].astype('Int64')
    # print(group[group['Extracted_Rebar_diameter'].notna()]['Text'])

    # int numbers
    int_pattern:str = r'^(\d+)$'
    group['Extracted_Int'] = group['Text'].str.extract(int_pattern)
    group['Extracted_Int'] = group['Extracted_Int'].astype(float)
    # print(group[group['Extracted_Int'].notna()]['Text'])

    # 混凝土強度
    concrete_strength_pattern:str = r'(\d+)\s+kgf/cm\\U\+00B2$'
    group['Extracted_Concrete_strength'] = group['Text'].str.extract(concrete_strength_pattern)
    group['Extracted_Concrete_strength'] = group['Extracted_Concrete_strength'].astype(float)
    
    # 主筋
    main_rebar_pattern:str = r'^(\d+)-D(\d+)$'
    group['Extracted_Main_rebar_count'], group['Extracted_Main_rebar_diameter'] = zip(*group['Text'].str.extract(main_rebar_pattern).values)
    group['Extracted_Main_rebar_count'] = pd.to_numeric(group['Extracted_Main_rebar_count'], errors='coerce')
    group['Extracted_Main_rebar_diameter'] = pd.to_numeric(group['Extracted_Main_rebar_diameter'], errors='coerce')
    group['Extracted_Main_rebar_count'] = group['Extracted_Main_rebar_count'].astype('Int64')
    group['Extracted_Main_rebar_diameter'] = group['Extracted_Main_rebar_diameter'].astype('Int64')
    # print(group[group['Extracted_Main_rebar_count'].notna()]['Text'])

    ''' 1. 萃取型號 '''
    type_pattern:str = r'BORED\s+PILE\s+TYPE\s+(\S+)'
    group['Type_name'] = group['Text'].str.extract(type_pattern)
    type_name = group['Type_name'].dropna().unique()
    if len(type_name) > 0:
        type_name = type_name[0]
    else:
        type_name = 'Unknown'
    print(f"型號: {type_name}")
    

    ''' 2. 萃取深度 '''
    Depth = EL_max - EL_min
    type_data['Depth'] = Depth
    print(f"深度: {Depth}")

    ''' 3. 萃取厚度 '''
    # extract the row which 'Text' equals to '場鑄擋土排樁'
    thickness_row:pd.Series = group[group['Text'] == '場鑄擋土排樁']
    x, y, z = parse_coordinate(thickness_row['CentreCoor'].values[0])
    # 尋找距離(x,y)最近的'int number'列
    int_rows:pd.DataFrame = group.dropna(subset=['Extracted_Int']).copy()
    nearest_index:int = find_nearest('Extracted_Int', x, y, int_rows)
    thickness:float = int_rows.iloc[nearest_index]['Extracted_Int']
    type_data['Thickness'] = thickness
    print(f"厚度: {thickness}")

    ''' 4. 混凝土強度 '''
    concrete_strength_rows:pd.DataFrame = group.dropna(subset=['Extracted_Concrete_strength']).copy()
    if len(concrete_strength_rows) > 0:
        concrete_strength:float = concrete_strength_rows['Extracted_Concrete_strength'].values[0]
    else:
        concrete_strength = 0
        print("無法找到混凝土強度")
    type_data['Concrete_strength'] = concrete_strength
    print(f"混凝土強度: {concrete_strength}")

    ''' 5. 主筋 & 箍筋 '''
    section_row:pd.Series = group[group['Text'] == '剖 面']
    if len(section_row) > 1:
        print("找到多個剖面")
    elif len(section_row) == 0:
        print("未找到剖面")
    else:
        x, y, z = parse_coordinate(section_row['CentreCoor'].values[0])
        # 尋找距離(x,y)最近的'main rebar'列
        main_rebar_rows:pd.DataFrame = group.dropna(subset=['Extracted_Main_rebar_count']).copy()
        nearest_index:int = find_nearest('Extracted_Main_rebar_count', x, y, main_rebar_rows)
        main_rebar_count:int = main_rebar_rows.iloc[nearest_index]['Extracted_Main_rebar_count']
        main_rebar_diameter:int = main_rebar_rows.iloc[nearest_index]['Extracted_Main_rebar_diameter']
        type_data['Main_rebar'] = {'Count': main_rebar_count, 'Diameter': main_rebar_diameter}
        print(f"主筋: {main_rebar_count} - D{main_rebar_diameter}")

        # 尋找距離(x,y)最近的'stirrup rebar'列
        stirrup_rebar_rows:pd.DataFrame = group.dropna(subset=['Extracted_Rebar_diameter']).copy()
        nearest_index:int = find_nearest('Extracted_Rebar_diameter', x, y, stirrup_rebar_rows)
        stirrup_rebar_diameter:int = stirrup_rebar_rows.iloc[nearest_index]['Extracted_Rebar_diameter']
        stirrup_rebar_spacing:int = stirrup_rebar_rows.iloc[nearest_index]['Extracted_Rebar_spacing']
        type_data['Stirrup_rebar'] = {'Diameter': stirrup_rebar_diameter, 'Spacing': stirrup_rebar_spacing}
        print(f"箍筋: D{stirrup_rebar_diameter} @ {stirrup_rebar_spacing}")

    ''' 6. 梁寬 & 梁長 '''
    beam_row:pd.Series = group[group['Text'] == '擋土排樁繫梁詳圖']
    if len(beam_row) > 1:
        print("找到多個擋土排樁繫梁詳圖")
    elif len(beam_row) == 0:
        print("未找到擋土排樁繫梁詳圖")
    else:
        x, y, z = parse_coordinate(beam_row['CentreCoor'].values[0])
        # 尋找距離(x,y)最近的'int number'列
        int_rows:pd.DataFrame = group.dropna(subset=['Extracted_Int']).copy()
        nearest_index:int = find_nearest('Extracted_Int', x, y, int_rows)
        beam_width:float = int_rows.iloc[nearest_index]['Extracted_Int']
        type_data['Tiebeam_width'] = beam_width
        print(f"梁寬: {beam_width}")
        
        int_rotated_rows:pd.DataFrame = group.dropna(subset=['Extracted_Int']).copy()
        int_rotated_rows = int_rotated_rows[int_rotated_rows['RotationAngle'] != '0.0']
        nearest_index:int = find_nearest('Extracted_Int', x, y, int_rotated_rows)
        beam_length:float = int_rotated_rows.iloc[nearest_index]['Extracted_Int']
        type_data['Tiebeam_length'] = beam_length
        print(f"梁長: {beam_length}")

    print(type_data)



組名: F762SE1850.dwg
型號: S4
深度: 37.0
厚度: 1000.0
混凝土強度: 350.0
主筋: 22 - D36
箍筋: D19 @ 100
梁寬: 1000.0
梁長: 1000.0
{'Depth': 37.0, 'Diameter': 0, 'Thickness': 1000.0, 'Tiebeam_width': 1000.0, 'Tiebeam_length': 1000.0, 'Main_rebar': {'Count': 22, 'Diameter': 36}, 'Stirrup_rebar': {'Diameter': 19, 'Spacing': 100}, 'Concrete_strength': 350.0}
組名: F762SE1860.dwg
型號: S4-1
深度: 37.0
厚度: 1000.0
混凝土強度: 350.0
主筋: 22 - D36
箍筋: D19 @ 150
梁寬: 1000.0
梁長: 1000.0
{'Depth': 37.0, 'Diameter': 0, 'Thickness': 1000.0, 'Tiebeam_width': 1000.0, 'Tiebeam_length': 1000.0, 'Main_rebar': {'Count': 22, 'Diameter': 36}, 'Stirrup_rebar': {'Diameter': 19, 'Spacing': 150}, 'Concrete_strength': 350.0}
組名: F762SE1870.dwg
型號: S4-2
深度: 37.0
厚度: 1000.0
混凝土強度: 350.0
主筋: 10 - D36
箍筋: D16 @ 150
梁寬: 1000.0
梁長: 1000.0
{'Depth': 37.0, 'Diameter': 0, 'Thickness': 1000.0, 'Tiebeam_width': 1000.0, 'Tiebeam_length': 1000.0, 'Main_rebar': {'Count': 10, 'Diameter': 36}, 'Stirrup_rebar': {'Diameter': 16, 'Spacing': 150}, 'Concrete_strength'

In [6]:
grouped = df.groupby('FileName')
print(grouped.groups.keys())

dict_keys(['F762SE1880.dwg'])
