In [2]:
import pandas as pd
import csv
import re
import numpy as np

In [11]:
csv_path = 'y39_stru.csv'

# 定義自定義解析器
def custom_csv_parser(file_path, encoding='Big5HKSCS'):
    rows = []
    with open(file_path, encoding=encoding) as file:
        reader = csv.reader(file)
        for row in reader:
            if len(row) > 8:
                row[7] = ', '.join(row[7:])
                row = row[:8]
            row[-1] = row[-1].rstrip(', ')
            rows.append(row)
    return rows

# 使用自定義解析器讀取文件
rows = custom_csv_parser(csv_path)

# 定義標題列
columns = ["FileName", "EntityName", "ObjectType", "RotationAngle", "CentreCoor", "Height", "Width", "Text"]

# 將結果轉換為 DataFrame
df = pd.DataFrame(rows[1:], columns=columns)

In [12]:
rows, columns = df.shape
for i in range(rows):
    print(df.iloc[i]['Text'])

\A1;---
\A1;1.00 TON/M{\H0.7x;\S2^;}
\A1;1.00 TON/M{\H0.7x;\S2^;}
\A1;1.00 TON/M{\H0.7x;\S2^;}
\A1;1.00 TON/M{\H0.7x;\S2^;}
\A1;0.50 TON/M{\H0.7x;\S2^;}
\A1;1.00 TON/M{\H0.7x;\S2^;}
\A1;0.50 TON/M{\H0.7x;\S2^;}
\A1;fc'  ≧  350  kgf/cm{\H0.7x;\S2^;}
\A1;fc'  ≧  280  kgf/cm{\H0.7x;\S2^;}
\A1;fc'  ≧  210  kgf/cm{\H0.7x;\S2^;}
\A1;fc'  ≧  450  kgf/cm{\H0.7x;\S2^;}
\A1;fc'  ≧  280  kgf/cm{\H0.7x;\S2^;}
\A1;fc'  ≧  140  kgf/cm{\H0.7x;\S2^;}
\A1;fc'  ≧  350  kgf/cm{\H0.7x;\S2^;}
\A1;Fy  ≧  2800  kgf/cm{\H0.7x;\S2^;}
\A1;Fy  ≧  4200  kgf/cm{\H0.7x;\S2^;}
\A1;Fy  ≧  3500  kgf/cm{\H0.7x;\S2^;}
\A1;Fy  ≧  2400  kgf/cm{\H0.7x;\S2^;}
\A1;Fy  ≧  2500  kgf/cm{\H0.7x;\S2^;}
\A1;Fy  ≧  2200  kgf/cm{\H0.7x;\S2^;}
\A1;Fy  ≧  2400  kgf/cm{\H0.7x;\S2^;}
\A1;Fy  ≧  2500  kgf/cm{\H0.7x;\S2^;}
\A1;Fy  ≧  3500  kgf/cm{\H0.7x;\S2^;}
\A1;Fy  ≧  3300  kgf/cm{\H0.7x;\S2^;}
{\H1.3462x;\W1;\C2;40mm<鋼板厚度 t ≦ 100mm }\P40mm < STEEL  PLATE  t  ≦ 100 mm
\A1;Fy  ≧  3000  kgf/cm{\H0.7x;\S2^;}
{\H1.3462x;\W1;\C2;鋼板厚度 t ≦ 40

In [13]:
def parse_coordinate(coord_str):
    # 使用正則表達式提取坐標值
    match = re.match(r'\(([\d.]+)\s+([\d.]+)\s+([\d.]+)\)', coord_str)
    if match:
        return [float(match.group(1)), float(match.group(2)), float(match.group(3))]
    else:
        raise ValueError(f"無法解析坐標: {coord_str}")
    
def find_nearest(target_column: str, candidate_columns: str, df: pd.DataFrame):

    # 找 target_column 為 True 的第一行
    target_rows = df[df[target_column]].iloc[0]
    # 找 candidate_columns 不為空的行
    candidate_rows = df[df[candidate_columns].notna()]

    # 檢查 candidate_columns 是否大於 1
    if len(candidate_rows) < 1:
        print(f"找不到足夠的候選行: {target_column}, {candidate_columns}")
        return 0
    
    target_coords = np.array(parse_coordinate(target_rows['CentreCoor']))
    candidate_coords = np.array([parse_coordinate(coord) for coord in candidate_rows['CentreCoor']])

    # 計算 x 坐標差異和總距離
    x_diff = candidate_coords[:, 0] - target_coords[0]
    distances = np.sqrt(np.sum((candidate_coords[:, :2] - target_coords[:2])**2, axis=1))

    # 只考慮 x 坐標大於的點
    valid_indices = x_diff > 0
    valid_distances = distances[valid_indices]
    valid_candidate_rows = candidate_rows[valid_indices]

    # 找出最近的點
    nearest_index = np.argmin(valid_distances)
    nearest_candidate_row = valid_candidate_rows.iloc[nearest_index]
    return nearest_candidate_row[candidate_columns]
    
def find_nearest_one_or_two(target_column: str, candidate_columns: str, df: pd.DataFrame):

    # 找 target_column 為 True 的第一行
    target_rows = df[df[target_column]].iloc[0]
    # 找 candidate_columns 不為空的行
    candidate_rows = df[df[candidate_columns].notna()]

    # 檢查 candidate_columns 是否大於 2 
    if len(candidate_rows) < 2:
        print(f"找不到足夠的候選行: {target_column}, {candidate_columns}")
        return [0,0]
    
    target_coords = np.array(parse_coordinate(target_rows['CentreCoor']))
    candidate_coords = np.array([parse_coordinate(coord) for coord in candidate_rows['CentreCoor']])

    # 計算 x 坐標差異和總距離
    x_diff = candidate_coords[:, 0] - target_coords[0]
    y_diff = candidate_coords[:, 1] - target_coords[1]
    distances = np.sqrt(np.sum((candidate_coords[:, :2] - target_coords[:2])**2, axis=1))

    # 只考慮 x 坐標大於的點
    valid_indices = x_diff >= 0
    valid_distances = distances[valid_indices]
    valid_candidate_rows = candidate_rows[valid_indices]
    # 只考慮 y 坐標大於的點
    valid_indices = y_diff >= -0.5
    valid_distances = distances[valid_indices]
    valid_candidate_rows = candidate_rows[valid_indices]

    # 找出最近的兩個點
    nearest_indices = np.argsort(valid_distances)
    if len(nearest_indices) < 2:
        nearest_candidate_row = valid_candidate_rows.iloc[nearest_indices[0]]
        return [nearest_candidate_row[candidate_columns]]
    else:
        nearest_indices = nearest_indices[:2]
        # 計算兩個距離，若較長者的百分之95小於較短者，則回傳兩個
        if valid_distances[nearest_indices[0]] > valid_distances[nearest_indices[1]] * 0.95:
            result = []
            for idx in nearest_indices:
                nearest_candidate_row = valid_candidate_rows.iloc[idx]
                result.append(nearest_candidate_row[candidate_columns])
            return result
        else:
            nearest_candidate_row = valid_candidate_rows.iloc[nearest_indices[0]]
            return [nearest_candidate_row[candidate_columns]]

fy_pattern = r'[Ff]y\s+≧?\s+(\d+)\s+kgf/cm'
mm_pattern = r'^(\d+)\s+mm'
fc_pattern = r'f\'?c\'?\s+≧\s+(\d+)\s+kgf/cm'
# fc_pattern = r'fc\'\s+¡Ù\s+(\d+)\s+kgf/cm'
protection_pattern2 = r'DIAPHRAGM\s+WALLS\s+\(BOTH\s+FACES\)'
protection_pattern1 = r'CONCRETE\s+CAST\s+AGAINST\s+AND\s+PERMANENTLY\s+EXPOSED\s+TO\s+WATER,\s+SOIL,\s+BLINDING'
rebar_strength_pattern2 = r'REINFORCEMENT\s+OF\s+10\s+mm\s+OR\s+SMALLER\s+IN\s+DIAMETER\s+SHALL\s+CONFORM\s+TO\s+CNS\s+560'
rebar_strength_pattern1 = r'REINFORCEMENT\s+OF\s+13\s+mm\s+OR\s+LARGER\s+IN\s+DIAMETER\s+SHALL\s+CONFORM\s+TO\s+CNS\s+560'
concrete_strength_pattern = r'^DIAPHRAGM\s+WALLS$'

In [14]:
wall_strength1:float = 0
wall_strength2:float = 0
rebar_strength1:float = 0
rebar_strength2:float = 0
protection1:float = 0
protection2:float = 0

grouped = df.groupby('FileName')

for name, gorup in grouped:
    
    print(name)

    # 創建一個新的列來存儲提取的數字
    gorup['fy_number'] = gorup['Text'].str.extract(fy_pattern)
    gorup['mm_number'] = gorup['Text'].str.extract(mm_pattern)
    gorup['fc_number'] = gorup['Text'].str.extract(fc_pattern)
    gorup['rebar_protection2'] = gorup['Text'].str.contains(protection_pattern2, regex=True)
    gorup['rebar_protection1'] = gorup['Text'].str.contains(protection_pattern1, regex=True)
    gorup['rebar_strength1'] = gorup['Text'].str.contains(rebar_strength_pattern1, regex=True)
    gorup['rebar_strength2'] = gorup['Text'].str.contains(rebar_strength_pattern2, regex=True)
    gorup['concrete_strength'] = gorup['Text'].str.contains(concrete_strength_pattern, regex=True)

    # 將提取的數字轉換為整數
    gorup['fy_number'] = gorup['fy_number'].astype(float)
    gorup['mm_number'] = gorup['mm_number'].astype(float)
    gorup['fc_number'] = gorup['fc_number'].astype(float)

    # Protection1
    # 先檢查'rebar_protection1'是否有符合的行
    if protection1 == 0 and gorup['rebar_protection1'].any():
        protection1 = find_nearest('rebar_protection1', 'mm_number', gorup)
        print('Protection1:', protection1, "mm")
    elif protection1 != 0:
        print('Protection1 已存在')
    else:
        print(f'Protection1: 無法在檔案中找到對應資料')
    
    # Protection2
    # 先檢查'rebar_protection2'是否有符合的行
    if protection2 == 0 and gorup['rebar_protection2'].any():
        protection2 = find_nearest('rebar_protection2', 'mm_number', gorup)
        print('Protection2:', protection2 , "mm")
    elif protection2 != 0:
        print('Protection2 已存在')
    else:
        print(f'Protection2: 無法在檔案中找到對應資料')

    # Rebar Strength1
    # 先檢查'rebar_strength1'是否有符合的行
    if rebar_strength1 == 0 and gorup['rebar_strength1'].any():
        rebar_strength1 = find_nearest('rebar_strength1', 'fy_number', gorup)
        print('Rebar Strength1:', rebar_strength1, "kgf/cm2")
    elif rebar_strength1 != 0:
        print('Rebar Strength1 已存在')
    else:
        print(f'Rebar Strength1: 無法在 {name} 檔案中找到對應 pattern')

    # Rebar Strength2
    # 先檢查'rebar_strength2'是否有符合的行
    if rebar_strength2 == 0 and gorup['rebar_strength2'].any():
        rebar_strength2 = find_nearest('rebar_strength2', 'fy_number', gorup)
        print('Rebar Strength2:', rebar_strength2, "kgf/cm2")
    elif rebar_strength2 != 0:
        print('Rebar Strength2 已存在')
    else:
        print(f'Rebar Strength2: 無法在 {name} 檔案中找到對應 pattern')

    # 找到 concrete_strength 最近的兩個 fc_number
    # 先檢查'concrete_strength'是否有符合的行
    if wall_strength1 == 0 and gorup['concrete_strength'].any():
        wall_strengths = find_nearest_one_or_two('concrete_strength', 'fc_number', gorup)
        if len(wall_strengths) == 2:
            wall_strength1 = wall_strengths[0]
            wall_strength2 = wall_strengths[1]
        if len(wall_strengths) == 1:
            wall_strength1 = wall_strengths[0]
        print('Wall Strength1:', wall_strength1, "kgf/cm2")
        print('Wall Strength2:', wall_strength2, "kgf/cm2")
    elif wall_strength1 != 0:
        print('Wall Strength 已存在')
    else:
        print(f'Wall Strength: 無法在 {name} 檔案中找到對應 pattern')

    print('-----------------------------------')
    if wall_strength1 != 0 and wall_strength2 != 0 and rebar_strength1 != 0 and rebar_strength2 != 0 and protection1 != 0 and protection2 != 0:
        break

print("Result:")
print('-----------------------------------')
print('Wall Strength1:', wall_strength1)
print('Wall Strength2:', wall_strength2)
print('Rebar Strength1:', rebar_strength1)
print('Rebar Strength2:', rebar_strength2)
print('Protection1:', protection1)
print('Protection2:', protection2)
    

F762SE0010.dwg
Protection1: 無法在檔案中找到對應資料
Protection2: 無法在檔案中找到對應資料
Rebar Strength1: 4200.0 kgf/cm2
Rebar Strength2: 2800.0 kgf/cm2
Wall Strength1: 350.0 kgf/cm2
Wall Strength2: 0 kgf/cm2
-----------------------------------
F762SE0020.dwg
Protection1: 75.0 mm
Protection2: 75.0 mm
Rebar Strength1 已存在
Rebar Strength2 已存在
Wall Strength 已存在
-----------------------------------
Result:
-----------------------------------
Wall Strength1: 350.0
Wall Strength2: 0
Rebar Strength1: 4200.0
Rebar Strength2: 2800.0
Protection1: 75.0
Protection2: 75.0
