In [14]:
import geopandas as gpd
import pandas as pd
import numpy as np
from shapely.ops import unary_union
from shapely.validation import make_valid
from tqdm import tqdm
import time
import logging

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

def find_neighbors(parcel, gdf):
    try:
        possible_matches_index = list(gdf.sindex.intersection(parcel.geometry.bounds))
        possible_matches = gdf.iloc[possible_matches_index]
        return possible_matches[possible_matches.geometry.touches(parcel.geometry)]
    except Exception as e:
        logging.error(f"在查找邻居时出错: {str(e)}")
        return gpd.GeoDataFrame()

def safe_union(geom1, geom2):
    try:
        union = unary_union([geom1, geom2])
        if not union.is_valid:
            union = make_valid(union)
        return union.buffer(0)  # 使用buffer(0)来修复可能的拓扑错误
    except Exception as e:
        logging.error(f"合并几何形状时出错: {str(e)}")
        return None

def calculate_total_area(gdf):
    return gdf.geometry.area.sum()

def compare_areas(area1, area2, tolerance=1e-6):
    diff = abs(area1 - area2)
    if diff > tolerance:
        logging.warning(f"面积不一致！差异: {diff}")
        return False
    else:
        logging.info("面积一致（在容忍范围内）")
        return True

def mark_small_parcels_for_merging(gdf, small_parcels, large_parcels, dldm_field, dlmc_field):
    merge_operations = []
    for index, small_parcel in tqdm(small_parcels.iterrows(), total=len(small_parcels), desc="标记小面积图斑"):
        neighbors = find_neighbors(small_parcel, large_parcels)
        if neighbors.empty:
            continue
        
        same_dlmc_neighbors = neighbors[neighbors[dlmc_field] == small_parcel[dlmc_field]]
        if not same_dlmc_neighbors.empty:
            target = same_dlmc_neighbors.loc[same_dlmc_neighbors['area'].idxmax()]
        else:
            target = neighbors.loc[neighbors['area'].idxmax()]
        
        merge_operations.append((index, target.name))
    
    return merge_operations

def execute_merges(gdf, merge_operations):
    merged_count = 0
    skipped_count = 0
    total_area_change = 0
    
    # 初始化 to_delete 列为 False
    gdf['to_delete'] = False
    
    for small_index, large_index in tqdm(merge_operations, desc="执行合并操作"):
        if small_index not in gdf.index or large_index not in gdf.index:
            skipped_count += 1
            continue
        
        small_geom = gdf.loc[small_index, 'geometry']
        large_geom = gdf.loc[large_index, 'geometry']
        
        original_area = small_geom.area + large_geom.area
        new_geometry = safe_union(small_geom, large_geom)
        
        if new_geometry is None:
            skipped_count += 1
            continue
        
        new_area = new_geometry.area
        area_change = new_area - original_area
        total_area_change += area_change
        
        if abs(area_change) > 1e-6:  # 允许1平方毫米的误差
            logging.warning(f"合并操作导致显著的面积变化。原面积: {original_area}, 新面积: {new_area}, 差异: {area_change}")
        
        # 更新大图斑
        gdf.loc[large_index, 'geometry'] = new_geometry
        gdf.loc[large_index, 'area'] = new_area
        
        # 标记小图斑为待删除
        gdf.loc[small_index, 'to_delete'] = True
        
        merged_count += 1
    
    # 删除已合并的小图斑
    gdf = gdf[~gdf['to_delete'].astype(bool)]
    gdf = gdf.drop(columns=['to_delete'])
    
    logging.info(f"合并了 {merged_count} 个图斑，跳过了 {skipped_count} 个图斑")
    logging.info(f"总面积变化: {total_area_change}")
    
    return gdf, merged_count


def merge_small_parcels(input_shp, output_base, dldm_field, dlmc_field, thresholds, default_threshold=50, max_iterations=10):
    start_time = time.time()
    logging.info(f"开始处理。输入Shapefile: {input_shp}")
    
    try:
        gdf = gpd.read_file(input_shp)
    except Exception as e:
        logging.error(f"读取shapefile时出错: {str(e)}")
        return
    
    original_count = len(gdf)
    original_area = calculate_total_area(gdf)
    logging.info(f"读取了 {original_count} 个图斑，总面积: {original_area}")
    
    logging.info("正在将多部件要素转换为单部件...")
    gdf = gdf.explode(index_parts=True).reset_index(drop=True)
    logging.info(f"转换后共有 {len(gdf)} 个图斑")
    
    iteration = 0
    total_merged = 0
    
    while iteration < max_iterations:
        iteration += 1
        logging.info(f"开始第 {iteration} 轮处理")
        
        gdf['area'] = gdf.geometry.area
        
        small_parcels = gdf[gdf.apply(lambda row: row['area'] < thresholds.get(row[dldm_field], default_threshold), axis=1)]
        large_parcels = gdf[gdf.apply(lambda row: row['area'] >= thresholds.get(row[dldm_field], default_threshold), axis=1)]
        
        logging.info(f"小面积图斑数量: {len(small_parcels)}, 大面积图斑数量: {len(large_parcels)}")
        
        merge_operations = mark_small_parcels_for_merging(gdf, small_parcels, large_parcels, dldm_field, dlmc_field)
        gdf, merged_count = execute_merges(gdf, merge_operations)
        
        total_merged += merged_count
        
        current_area = calculate_total_area(gdf)
        logging.info(f"第 {iteration} 轮处理完成。本轮合并 {merged_count} 个图斑。当前总面积: {current_area}")
        if not compare_areas(original_area, current_area):
            logging.error("面积不一致，处理终止。")
            return
        
        if merged_count == 0:
            logging.info("没有更多图斑可以合并，处理结束。")
            break
    
    logging.info(f"处理完成。总共合并 {total_merged} 个图斑。")
    
    final_area = calculate_total_area(gdf)
    logging.info(f"最终总面积: {final_area}")
    logging.info("比较原始面积和最终面积:")
    if not compare_areas(original_area, final_area):
        logging.error("最终面积与原始面积不一致，请检查处理过程。")
        return
    
    # 保存结果到SHP（截断字段名）
    output_shp = f"{output_base}.shp"
    result_truncated = gdf.rename(columns={col: col[:10] for col in gdf.columns if len(col) > 10})
    result_truncated.to_file(output_shp, encoding='utf-8')
    logging.info(f"结果保存为SHP（字段名已截断）: {output_shp}")
    
    end_time = time.time()
    logging.info(f"总处理时间: {(end_time - start_time) / 60:.2f} 分钟")
    logging.info(f"最终图斑数量: {len(gdf)}")
    logging.info(f"减少的图斑数量: {original_count - len(gdf)}")
# 使用示例
input_shp = r"C:\Users\Runker\Desktop\ele_test\sb_sd_mz.shp"
output_base = r"C:\Users\Runker\Desktop\ele_test\sb_sd_mz_result"
dldm_field = "DLDM"
dlmc_field = "DLMC"
thresholds = {"01": 50, "02": 50, "03": 1000, "04": 1000}
default_threshold = 50  # 为未指定的DLDM类型设置默认阈值

merge_small_parcels(input_shp, output_base, dldm_field, dlmc_field, thresholds, default_threshold)

2024-09-04 22:38:49,328 - INFO - 开始处理。输入Shapefile: C:\Users\Runker\Desktop\ele_test\test.shp
2024-09-04 22:38:55,529 - INFO - 读取了 17708 个图斑，总面积: 176296810.78207093
2024-09-04 22:38:55,531 - INFO - 正在将多部件要素转换为单部件...
2024-09-04 22:38:55,603 - INFO - 转换后共有 17708 个图斑
2024-09-04 22:38:55,604 - INFO - 开始第 1 轮处理
2024-09-04 22:38:55,771 - INFO - 小面积图斑数量: 5064, 大面积图斑数量: 12644
标记小面积图斑: 100%|██████████| 5064/5064 [00:16<00:00, 303.44it/s]
执行合并操作: 100%|██████████| 5028/5028 [00:05<00:00, 981.47it/s] 
2024-09-04 22:39:17,587 - INFO - 合并了 5028 个图斑，跳过了 0 个图斑
2024-09-04 22:39:17,587 - INFO - 总面积变化: 5.481668097218062e-08
2024-09-04 22:39:17,587 - INFO - 第 1 轮处理完成。本轮合并 5028 个图斑。当前总面积: 176296810.782071
2024-09-04 22:39:17,587 - INFO - 面积一致（在容忍范围内）
2024-09-04 22:39:17,587 - INFO - 开始第 2 轮处理
2024-09-04 22:39:17,720 - INFO - 小面积图斑数量: 36, 大面积图斑数量: 12644
标记小面积图斑: 100%|██████████| 36/36 [00:00<00:00, 301.38it/s]
执行合并操作: 100%|██████████| 30/30 [00:00<00:00, 2173.33it/s]
2024-09-04 22:39:17,854 - INFO - 合并了 30 个