# 稳定快速

In [4]:
import geopandas as gpd
import pandas as pd
import numpy as np
from shapely.ops import unary_union
from shapely.validation import make_valid
from tqdm import tqdm
import time
import logging

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

def safe_union(geom1, geom2):
    try:
        union = unary_union([geom1, geom2])
        if not union.is_valid:
            union = make_valid(union)
        return union
    except Exception as e:
        logging.error(f"合并几何形状时出错: {str(e)}")
        return None

def find_neighbors(parcel, gdf):
    try:
        possible_matches_index = list(gdf.sindex.intersection(parcel.geometry.bounds))
        possible_matches = gdf.iloc[possible_matches_index]
        return possible_matches[possible_matches.geometry.touches(parcel.geometry)]
    except Exception as e:
        logging.error(f"在查找邻居时出错: {str(e)}")
        return gpd.GeoDataFrame()

def mark_small_parcels_for_merging(gdf, small_parcels, large_parcels, dldm_field, dlmc_field):
    merge_operations = []
    for index, small_parcel in small_parcels.iterrows():
        neighbors = find_neighbors(small_parcel, large_parcels)
        if neighbors.empty:
            # logging.info(f"图斑 {index} 没有找到邻居，跳过")
            continue
        
        same_dlmc_neighbors = neighbors[neighbors[dlmc_field] == small_parcel[dlmc_field]]
        if not same_dlmc_neighbors.empty:
            target = same_dlmc_neighbors.loc[same_dlmc_neighbors['area'].idxmax()]
        else:
            continue  # 只合并相同 DLMC 的图斑
        
        merge_operations.append((index, target.name))
    
    return merge_operations

def execute_merges(gdf, merge_operations):
    merged_count = 0
    skipped_count = 0
    
    gdf['to_delete'] = False
    
    for small_index, large_index in tqdm(merge_operations, desc="执行合并操作"):
        if small_index not in gdf.index or large_index not in gdf.index:
            skipped_count += 1
            logging.info(f"跳过合并操作：小图斑 {small_index} 或大图斑 {large_index} 不在索引中")
            continue
        
        small_geom = gdf.loc[small_index, 'geometry']
        large_geom = gdf.loc[large_index, 'geometry']
        
        new_geometry = safe_union(small_geom, large_geom)
        
        if new_geometry is None:
            skipped_count += 1
            logging.info(f"跳过合并操作：无法合并图斑 {small_index} 和 {large_index}")
            continue
        
        gdf.loc[large_index, 'geometry'] = new_geometry
        gdf.loc[large_index, 'area'] = new_geometry.area
        gdf.loc[small_index, 'to_delete'] = True
        
        merged_count += 1
    
    gdf = gdf[~gdf['to_delete']]
    gdf = gdf.drop(columns=['to_delete'])
    
    logging.info(f"合并了 {merged_count} 个图斑，跳过了 {skipped_count} 个图斑")
    
    return gdf, merged_count

def merge_small_parcels(input_shp, output_base, dldm_field, dlmc_field, thresholds, default_threshold=50, max_iterations=10):
    start_time = time.time()
    logging.info(f"开始处理。输入Shapefile: {input_shp}")
    
    try:
        gdf = gpd.read_file(input_shp)
    except Exception as e:
        logging.error(f"读取shapefile时出错: {str(e)}")
        return
    
    original_crs = gdf.crs
    original_count = len(gdf)
    original_area = gdf.geometry.area.sum()
    logging.info(f"读取了 {original_count} 个图斑，总面积: {original_area}")
    logging.info(f"原始坐标系统: {original_crs}")
    
    logging.info("正在将多部件要素转换为单部件...")
    gdf = gdf.explode(index_parts=True).reset_index(drop=True)
    logging.info(f"转换后共有 {len(gdf)} 个图斑")
    
    iteration = 0
    total_merged = 0
    
    while iteration < max_iterations:
        iteration += 1
        logging.info(f"开始第 {iteration} 轮处理")
        
        gdf['area'] = gdf.geometry.area
        
        def get_threshold(row):
            dldm = row[dldm_field]
            return thresholds.get(dldm, default_threshold)
        
        gdf['threshold'] = gdf.apply(get_threshold, axis=1)
        small_parcels = gdf[gdf['area'] < gdf['threshold']]
        large_parcels = gdf[gdf['area'] >= gdf['threshold']]
        
        logging.info(f"小面积图斑数量: {len(small_parcels)}, 大面积图斑数量: {len(large_parcels)}")
        logging.info(f"小面积图斑DLDM分布: {small_parcels[dldm_field].value_counts().to_dict()}")
        
        if len(small_parcels) == 0:
            logging.info("没有找到小面积图斑，处理结束。")
            break
        
        merge_operations = mark_small_parcels_for_merging(gdf, small_parcels, large_parcels, dldm_field, dlmc_field)
        
        if not merge_operations:
            logging.info("没有可以合并的图斑，处理结束。")
            break
        
        gdf, merged_count = execute_merges(gdf, merge_operations)
        
        total_merged += merged_count
        
        current_area = gdf.geometry.area.sum()
        logging.info(f"第 {iteration} 轮处理完成。本轮合并 {merged_count} 个图斑。当前总面积: {current_area}")
        
        if merged_count == 0:
            logging.info("没有更多图斑可以合并，处理结束。")
            break
    
    logging.info(f"处理完成。总共合并 {total_merged} 个图斑。")
    
    final_area = gdf.geometry.area.sum()
    logging.info(f"最终总面积: {final_area}")
    logging.info(f"总面积变化: {final_area - original_area}")
    
    # 确保坐标系统不变
    gdf = gdf.set_crs(original_crs, allow_override=True)
    

    # 保存结果
    output_shp = f"{output_base}"
    result_truncated = gdf.rename(columns={col: col[:10] for col in gdf.columns if len(col) > 10})
    result_truncated.to_file(output_shp, encoding='utf-8')
    # 单部件
    result_single_part = result_truncated.explode(index_parts=True).reset_index(drop=True)
    result_single_part.to_file(output_shp, encoding='utf-8')
    logging.info(f"结果已保存至: {output_shp}")
    result_truncated.to_file(output_shp, encoding='utf-8')
    logging.info(f"结果保存为SHP（字段名已截断）: {output_shp}")
    
    end_time = time.time()
    logging.info(f"总处理时间: {(end_time - start_time) / 60:.2f} 分钟")
    logging.info(f"最终图斑数量: {len(result_truncated)}")
    logging.info(f"减少的图斑数量: {original_count - len(result_truncated)}")

# 使用示例
input_shp = r"C:\Users\Runker\Desktop\ele_sb\gl_merge_data_single_split_curve.shp"
output_base = r"C:\Users\Runker\Desktop\ele_sb\gl_ele_result.shp"
dldm_field = "DLDM"
dlmc_field = "DLMC"
thresholds = {"01": 50, "02": 50, "03": 2000, "04": 2000}
default_threshold = 50  # 为未指定的DLDM类型设置默认阈值

merge_small_parcels(input_shp, output_base, dldm_field, dlmc_field, thresholds, default_threshold)

2024-11-01 17:42:45,456 - INFO - 开始处理。输入Shapefile: C:\Users\Runker\Desktop\ele_sb\sb_merge_data_single_split_curve_2.shp
2024-11-01 17:43:30,821 - INFO - 读取了 335668 个图斑，总面积: 1462965997.4097595
2024-11-01 17:43:30,838 - INFO - 原始坐标系统: EPSG:4545
2024-11-01 17:43:30,838 - INFO - 正在将多部件要素转换为单部件...
2024-11-01 17:43:32,422 - INFO - 转换后共有 337440 个图斑
2024-11-01 17:43:32,422 - INFO - 开始第 1 轮处理
2024-11-01 17:43:33,539 - INFO - 小面积图斑数量: 78616, 大面积图斑数量: 258824
2024-11-01 17:43:33,560 - INFO - 小面积图斑DLDM分布: {'03': 75624, '04': 2371, '01': 589, '02': 25, '12': 7}
执行合并操作: 100%|██████████| 59843/59843 [00:27<00:00, 2171.07it/s]
2024-11-01 17:46:33,774 - INFO - 合并了 59843 个图斑，跳过了 0 个图斑
2024-11-01 17:46:33,919 - INFO - 第 1 轮处理完成。本轮合并 59843 个图斑。当前总面积: 1462965997.4097598
2024-11-01 17:46:33,919 - INFO - 开始第 2 轮处理
2024-11-01 17:46:34,969 - INFO - 小面积图斑数量: 18773, 大面积图斑数量: 258824
2024-11-01 17:46:34,969 - INFO - 小面积图斑DLDM分布: {'03': 17661, '04': 1076, '01': 27, '02': 5, '12': 4}
执行合并操作: 100%|██████████| 3534/35