In [1]:
import pandas as pd
import csv
import re
import numpy as np

In [127]:
csv_path = 'lg09_diastru.csv'

# 定義自定義解析器
def custom_csv_parser(file_path, encoding='ISO-8859-1'):
    rows = []
    with open(file_path, encoding=encoding) as file:
        reader = csv.reader(file)
        for row in reader:
            if len(row) > 8:
                row[7] = ', '.join(row[7:])
                row = row[:8]
            row[-1] = row[-1].rstrip(', ')
            rows.append(row)
    return rows

# 使用自定義解析器讀取文件
rows = custom_csv_parser(csv_path)

# 定義標題列
columns = ["FileName", "EntityName", "ObjectType", "RotationAngle", "CentreCoor", "Height", "Width", "Text"]

# 將結果轉換為 DataFrame
df = pd.DataFrame(rows[1:], columns=columns)

In [None]:
rows, columns = df.shape
for i in range(rows):
    print(df.iloc[i]['Text'])

In [128]:
fy_pattern = r'^fy\s+(\d+)\s+kgf/cm'
mm_pattern = r'^(\d+)\s+mm'
fc_pattern = r'^f\'c ¡Ù (\d+) kgf/cm'
protection_pattern2 = r'DIAPHRAGM\s+WALLS\s+\(BOTH\s+FACES\)'
protection_pattern1 = r'CONCRETE\s+CAST\s+AGAINST\s+AND\s+PERMANENTLY\s+EXPOSED\s+TO\s+WATER,\s+SOIL,\s+BLINDING'
rebar_strength_pattern2 = r'REINFORCEMENT\s+OF\s+10\s+mm\s+OR\s+SMALLER\s+IN\s+DIAMETER\s+SHALL\s+CONFORM\s+TO\s+CNS\s+560'
rebar_strength_pattern1 = r'REINFORCEMENT\s+OF\s+13\s+mm\s+OR\s+LARGER\s+IN\s+DIAMETER\s+SHALL\s+CONFORM\s+TO\s+CNS\s+560'
concrete_strength_pattern = r'^DIAPHRAGM\s+WALLS$'

# 創建一個新的列來存儲提取的數字
df['fy_number'] = df['Text'].str.extract(fy_pattern)
df['mm_number'] = df['Text'].str.extract(mm_pattern)
df['fc_number'] = df['Text'].str.extract(fc_pattern)
df['rebar_protection2'] = df['Text'].str.contains(protection_pattern2, regex=True)
df['rebar_protection1'] = df['Text'].str.contains(protection_pattern1, regex=True)
df['rebar_strength1'] = df['Text'].str.contains(rebar_strength_pattern1, regex=True)
df['rebar_strength2'] = df['Text'].str.contains(rebar_strength_pattern2, regex=True)
df['concrete_strength'] = df['Text'].str.contains(concrete_strength_pattern, regex=True)

# 如果您想將提取的數字轉換為整數
df['fy_number'] = df['fy_number'].astype(float)
df['mm_number'] = df['mm_number'].astype(float)
df['fc_number'] = df['fc_number'].astype(float)

In [130]:
def parse_coordinate(coord_str):
    # 使用正則表達式提取坐標值
    match = re.match(r'\(([\d.]+)\s+([\d.]+)\s+([\d.]+)\)', coord_str)
    if match:
        return [float(match.group(1)), float(match.group(2)), float(match.group(3))]
    else:
        raise ValueError(f"無法解析坐標: {coord_str}")
    
def find_nearest_two(target_column: str, candidate_columns: str, df: pd.DataFrame):

    target_rows = df[df[target_column]].iloc[0]
    candidate_rows = df[df[candidate_columns].notna()]

    target_coords = np.array(parse_coordinate(target_rows['CentreCoor']))
    candidate_coords = np.array([parse_coordinate(coord) for coord in candidate_rows['CentreCoor']])

    # 計算 x 坐標差異和總距離
    x_diff = candidate_coords[:, 0] - target_coords[0]
    distances = np.sqrt(np.sum((candidate_coords[:, :2] - target_coords[:2])**2, axis=1))

    # 只考慮 x 坐標大於的點
    valid_indices = x_diff > 0
    valid_distances = distances[valid_indices]
    valid_candidate_rows = candidate_rows[valid_indices]

    # 找出最近的兩個點
    nearest_indices = np.argsort(valid_distances)[:2]

    result = []
    for idx in nearest_indices:
        nearest_candidate_row = valid_candidate_rows.iloc[idx]
        result.append(nearest_candidate_row[candidate_columns])

    return result

# 找到 rebar_protection1 最近的兩個 mm_number
protection1 = find_nearest_two('rebar_protection1', 'mm_number', df)
print(f'The protection1 :{protection1[0]}')

# 找到 rebar_protection2 最近的兩個 mm_number
protection2 = find_nearest_two('rebar_protection2', 'mm_number', df)
print(f'The protection2 :{protection2[0]}')

# 找到 rebar_strength1 最近的兩個 fy_number
rebar_strength1 = find_nearest_two('rebar_strength1', 'fy_number', df)
print(f'The rebar_strength1 :{rebar_strength1[0]}')

# 找到 rebar_strength2 最近的兩個 fy_number
rebar_strength2 = find_nearest_two('rebar_strength2', 'fy_number', df)
print(f'The rebar_strength2 :{rebar_strength2[0]}')

# 找到 concrete_strength 最近的兩個 fc_number
concrete_strength = find_nearest_two('concrete_strength', 'fc_number', df)
print(f'The concrete_strength :{concrete_strength}')


The protection1 :75.0
The protection2 :75.0
The rebar_strength1 :4200.0
The rebar_strength2 :2800.0
The concrete_strength :[350.0, 245.0]
