In [5]:
import geopandas as gpd
import pandas as pd
import numpy as np
from shapely.ops import unary_union
from shapely.validation import make_valid
from tqdm import tqdm
import time
import logging

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

def find_neighbors(parcel, gdf):
    try:
        possible_matches_index = list(gdf.sindex.intersection(parcel.geometry.bounds))
        possible_matches = gdf.iloc[possible_matches_index]
        return possible_matches[possible_matches.geometry.touches(parcel.geometry)]
    except Exception as e:
        logging.error(f"在查找邻居时出错: {str(e)}")
        return gpd.GeoDataFrame()

def safe_union(geom1, geom2):
    try:
        union = unary_union([geom1, geom2])
        if not union.is_valid:
            union = make_valid(union)
        return union.buffer(0)  # 使用buffer(0)来修复可能的拓扑错误
    except Exception as e:
        logging.error(f"合并几何形状时出错: {str(e)}")
        return None

def calculate_total_area(gdf):
    return gdf.geometry.area.sum()

def compare_areas(area1, area2, tolerance=1e-6):
    diff = abs(area1 - area2)
    if diff > tolerance:
        logging.warning(f"面积不一致！差异: {diff}")
        return False
    else:
        logging.info("面积一致（在容忍范围内）")
        return True

def mark_small_parcels_for_merging(gdf, small_parcels, large_parcels, dldm_field, dlmc_field):
    merge_operations = []
    for index, small_parcel in tqdm(small_parcels.iterrows(), total=len(small_parcels), desc="标记小面积图斑"):
        neighbors = find_neighbors(small_parcel, large_parcels)
        if neighbors.empty:
            continue
        
        same_dlmc_neighbors = neighbors[neighbors[dlmc_field] == small_parcel[dlmc_field]]
        if not same_dlmc_neighbors.empty:
            target = same_dlmc_neighbors.loc[same_dlmc_neighbors['area'].idxmax()]
        else:
            target = neighbors.loc[neighbors['area'].idxmax()]
        
        merge_operations.append((index, target.name))
    
    return merge_operations

def execute_merges(gdf, merge_operations):
    merged_count = 0
    skipped_count = 0
    total_area_change = 0
    
    # 初始化 to_delete 列为 False
    gdf['to_delete'] = False
    
    for small_index, large_index in tqdm(merge_operations, desc="执行合并操作"):
        if small_index not in gdf.index or large_index not in gdf.index:
            skipped_count += 1
            continue
        
        small_geom = gdf.loc[small_index, 'geometry']
        large_geom = gdf.loc[large_index, 'geometry']
        
        original_area = small_geom.area + large_geom.area
        new_geometry = safe_union(small_geom, large_geom)
        
        if new_geometry is None:
            skipped_count += 1
            continue
        
        new_area = new_geometry.area
        area_change = new_area - original_area
        total_area_change += area_change
        
        if abs(area_change) > 1e-6:  # 允许1平方毫米的误差
            logging.warning(f"合并操作导致显著的面积变化。原面积: {original_area}, 新面积: {new_area}, 差异: {area_change}")
        
        # 更新大图斑
        gdf.loc[large_index, 'geometry'] = new_geometry
        gdf.loc[large_index, 'area'] = new_area
        
        # 标记小图斑为待删除
        gdf.loc[small_index, 'to_delete'] = True
        
        merged_count += 1
    
    # 删除已合并的小图斑
    gdf = gdf[~gdf['to_delete'].astype(bool)]
    gdf = gdf.drop(columns=['to_delete'])
    
    logging.info(f"合并了 {merged_count} 个图斑，跳过了 {skipped_count} 个图斑")
    logging.info(f"总面积变化: {total_area_change}")
    
    return gdf, merged_count


def merge_small_parcels(input_shp, output_base, dldm_field, dlmc_field, thresholds, default_threshold=50, max_iterations=10):
    start_time = time.time()
    logging.info(f"开始处理。输入Shapefile: {input_shp}")
    
    try:
        gdf = gpd.read_file(input_shp)
    except Exception as e:
        logging.error(f"读取shapefile时出错: {str(e)}")
        return
    
    original_count = len(gdf)
    original_area = calculate_total_area(gdf)
    logging.info(f"读取了 {original_count} 个图斑，总面积: {original_area}")
    
    logging.info("正在将多部件要素转换为单部件...")
    gdf = gdf.explode(index_parts=True).reset_index(drop=True)
    logging.info(f"转换后共有 {len(gdf)} 个图斑")
    
    iteration = 0
    total_merged = 0
    
    while iteration < max_iterations:
        iteration += 1
        logging.info(f"开始第 {iteration} 轮处理")
        
        gdf['area'] = gdf.geometry.area
        
        small_parcels = gdf[gdf.apply(lambda row: row['area'] < thresholds.get(row[dldm_field], default_threshold), axis=1)]
        large_parcels = gdf[gdf.apply(lambda row: row['area'] >= thresholds.get(row[dldm_field], default_threshold), axis=1)]
        
        logging.info(f"小面积图斑数量: {len(small_parcels)}, 大面积图斑数量: {len(large_parcels)}")
        
        merge_operations = mark_small_parcels_for_merging(gdf, small_parcels, large_parcels, dldm_field, dlmc_field)
        gdf, merged_count = execute_merges(gdf, merge_operations)
        
        total_merged += merged_count
        
        current_area = calculate_total_area(gdf)
        logging.info(f"第 {iteration} 轮处理完成。本轮合并 {merged_count} 个图斑。当前总面积: {current_area}")
        if not compare_areas(original_area, current_area):
            logging.error("面积不一致，处理终止。")
            return
        
        if merged_count == 0:
            logging.info("没有更多图斑可以合并，处理结束。")
            break
    
    logging.info(f"处理完成。总共合并 {total_merged} 个图斑。")
    
    final_area = calculate_total_area(gdf)
    logging.info(f"最终总面积: {final_area}")
    logging.info("比较原始面积和最终面积:")
    if not compare_areas(original_area, final_area):
        logging.error("最终面积与原始面积不一致，请检查处理过程。")
        return
    
    # 保存结果到SHP（截断字段名）
    output_shp = f"{output_base}.shp"
    result_truncated = gdf.rename(columns={col: col[:10] for col in gdf.columns if len(col) > 10})
    result_truncated.to_file(output_shp, encoding='utf-8')
    logging.info(f"结果保存为SHP（字段名已截断）: {output_shp}")
    
    end_time = time.time()
    logging.info(f"总处理时间: {(end_time - start_time) / 60:.2f} 分钟")
    logging.info(f"最终图斑数量: {len(gdf)}")
    logging.info(f"减少的图斑数量: {original_count - len(gdf)}")
# 使用示例
input_shp = r"C:\Users\Runker\Desktop\ele_test\sb_merge_data.shp"
output_base = r"C:\Users\Runker\Desktop\ele_test\sb_merge_data_result_3"
dldm_field = "DLDM"
dlmc_field = "DLMC"
thresholds = {"01": 50, "02": 50, "03": 1000, "04": 1000}
default_threshold = 50  # 为未指定的DLDM类型设置默认阈值

merge_small_parcels(input_shp, output_base, dldm_field, dlmc_field, thresholds, default_threshold)

2024-09-05 11:05:42,374 - INFO - 开始处理。输入Shapefile: C:\Users\Runker\Desktop\ele_test\sb_merge_data.shp
2024-09-05 11:06:32,904 - INFO - 读取了 165557 个图斑，总面积: 1463055810.020485
2024-09-05 11:06:32,904 - INFO - 正在将多部件要素转换为单部件...
2024-09-05 11:06:34,472 - INFO - 转换后共有 256241 个图斑
2024-09-05 11:06:34,472 - INFO - 开始第 1 轮处理
2024-09-05 11:06:37,188 - INFO - 小面积图斑数量: 77118, 大面积图斑数量: 179123
标记小面积图斑: 100%|██████████| 77118/77118 [02:55<00:00, 439.22it/s]
执行合并操作: 100%|██████████| 76052/76052 [01:01<00:00, 1235.44it/s]
2024-09-05 11:10:34,565 - INFO - 合并了 76052 个图斑，跳过了 0 个图斑
2024-09-05 11:10:34,566 - INFO - 总面积变化: 3.7359129123615276e-08
2024-09-05 11:10:34,697 - INFO - 第 1 轮处理完成。本轮合并 76052 个图斑。当前总面积: 1463055810.0204852
2024-09-05 11:10:34,698 - INFO - 面积一致（在容忍范围内）
2024-09-05 11:10:34,698 - INFO - 开始第 2 轮处理
2024-09-05 11:10:37,254 - INFO - 小面积图斑数量: 1066, 大面积图斑数量: 179123
标记小面积图斑: 100%|██████████| 1066/1066 [00:02<00:00, 372.70it/s]
执行合并操作: 100%|██████████| 672/672 [00:00<00:00, 1231.51it/s]
2024-09-05 

In [4]:
import geopandas as gpd
import pandas as pd
import numpy as np
from shapely.ops import unary_union
from shapely.validation import make_valid
from shapely.geometry import Polygon, MultiPolygon
from shapely.affinity import scale  # 添加这行来导入 scale 函数
from tqdm import tqdm
import time
import logging

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

def safe_union(geom1, geom2):
    try:
        union = unary_union([geom1, geom2])
        if not union.is_valid:
            union = make_valid(union)
        return union.buffer(0)  # 使用buffer(0)来修复可能的拓扑错误
    except Exception as e:
        logging.error(f"合并几何形状时出错: {str(e)}")
        return None

def adjust_geometry(geometry, target_area):
    current_area = geometry.area
    scale_factor = np.sqrt(target_area / current_area)
    adjusted = scale(geometry, xfact=scale_factor, yfact=scale_factor, origin='centroid')
    return adjusted

def execute_merges(gdf, merge_operations, area_change_threshold=1.0):
    merged_count = 0
    skipped_count = 0
    total_area_change = 0
    area_changes = []
    
    gdf['to_delete'] = False
    
    for small_index, large_index in tqdm(merge_operations, desc="执行合并操作"):
        if small_index not in gdf.index or large_index not in gdf.index:
            skipped_count += 1
            continue
        
        small_geom = gdf.loc[small_index, 'geometry']
        large_geom = gdf.loc[large_index, 'geometry']
        
        original_area = small_geom.area + large_geom.area
        new_geometry = safe_union(small_geom, large_geom)
        
        if new_geometry is None:
            skipped_count += 1
            continue
        
        new_area = new_geometry.area
        area_change = new_area - original_area
        
        if abs(area_change) > area_change_threshold:
            adjusted_geometry = adjust_geometry(new_geometry, original_area)
            new_area = adjusted_geometry.area
            area_change = new_area - original_area
            logging.info(f"调整了合并后的几何形状。原面积: {original_area}, 调整后面积: {new_area}, 剩余差异: {area_change}")
            new_geometry = adjusted_geometry
        
        total_area_change += area_change
        area_changes.append(area_change)
        
        gdf.loc[large_index, 'geometry'] = new_geometry
        gdf.loc[large_index, 'area'] = new_area
        gdf.loc[small_index, 'to_delete'] = True
        
        merged_count += 1
    
    gdf = gdf[~gdf['to_delete']]
    gdf = gdf.drop(columns=['to_delete'])
    
    logging.info(f"合并了 {merged_count} 个图斑，跳过了 {skipped_count} 个图斑")
    logging.info(f"总面积变化: {total_area_change}")
    # logging.info(f"最大面积变化: {max(abs(change) for change in area_changes)}")
    # logging.info(f"平均面积变化: {sum(abs(change) for change in area_changes) / len(area_changes)}")
    
    return gdf, merged_count, total_area_change

def merge_small_parcels(input_shp, output_base, dldm_field, dlmc_field, thresholds, default_threshold=50, max_iterations=10, area_change_threshold=1.0):
    start_time = time.time()
    logging.info(f"开始处理。输入Shapefile: {input_shp}")
    
    try:
        gdf = gpd.read_file(input_shp)
    except Exception as e:
        logging.error(f"读取shapefile时出错: {str(e)}")
        return
    
    original_count = len(gdf)
    original_area = gdf.geometry.area.sum()
    logging.info(f"读取了 {original_count} 个图斑，总面积: {original_area}")
    
    logging.info("正在将多部件要素转换为单部件...")
    gdf = gdf.explode(index_parts=True).reset_index(drop=True)
    logging.info(f"转换后共有 {len(gdf)} 个图斑")
    
    iteration = 0
    total_merged = 0
    total_area_change = 0
    
    while iteration < max_iterations:
        iteration += 1
        logging.info(f"开始第 {iteration} 轮处理")
        
        gdf['area'] = gdf.geometry.area
        
        small_parcels = gdf[gdf.apply(lambda row: row['area'] < thresholds.get(row[dldm_field], default_threshold), axis=1)]
        large_parcels = gdf[gdf.apply(lambda row: row['area'] >= thresholds.get(row[dldm_field], default_threshold), axis=1)]
        
        logging.info(f"小面积图斑数量: {len(small_parcels)}, 大面积图斑数量: {len(large_parcels)}")
        
        merge_operations = mark_small_parcels_for_merging(gdf, small_parcels, large_parcels, dldm_field, dlmc_field)
        gdf, merged_count, area_change = execute_merges(gdf, merge_operations, area_change_threshold)
        
        total_merged += merged_count
        total_area_change += area_change
        
        current_area = gdf.geometry.area.sum()
        logging.info(f"第 {iteration} 轮处理完成。本轮合并 {merged_count} 个图斑。当前总面积: {current_area}")
        logging.info(f"累计面积变化: {total_area_change}")
        
        if merged_count == 0:
            logging.info("没有更多图斑可以合并，处理结束。")
            break
    
    logging.info(f"处理完成。总共合并 {total_merged} 个图斑。")
    
    final_area = gdf.geometry.area.sum()
    logging.info(f"最终总面积: {final_area}")
    logging.info(f"总面积变化: {final_area - original_area}")
    
    # 保存结果到SHP（截断字段名）
    output_shp = f"{output_base}.shp"
    result_truncated = gdf.rename(columns={col: col[:10] for col in gdf.columns if len(col) > 10})
    result_truncated.to_file(output_shp, encoding='utf-8')
    logging.info(f"结果保存为SHP（字段名已截断）: {output_shp}")
    
    end_time = time.time()
    logging.info(f"总处理时间: {(end_time - start_time) / 60:.2f} 分钟")
    logging.info(f"最终图斑数量: {len(gdf)}")
    logging.info(f"减少的图斑数量: {original_count - len(gdf)}")

# 使用示例
input_shp = r"C:\Users\Runker\Desktop\ele_test\sb_merge_data.shp"
output_base = r"C:\Users\Runker\Desktop\ele_test\sb_merge_data_result_2"
dldm_field = "DLDM"
dlmc_field = "DLMC"
thresholds = {"01": 50, "02": 50, "03": 1000, "04": 1000}
default_threshold = 50  # 为未指定的DLDM类型设置默认阈值
area_change_threshold = 1.0  # 面积变化阈值（平方米）

merge_small_parcels(input_shp, output_base, dldm_field, dlmc_field, thresholds, default_threshold, area_change_threshold=area_change_threshold)

2024-09-05 10:53:26,863 - INFO - 开始处理。输入Shapefile: C:\Users\Runker\Desktop\ele_test\sb_merge_data.shp
2024-09-05 10:54:37,563 - INFO - 读取了 165557 个图斑，总面积: 1463055810.020485
2024-09-05 10:54:37,564 - INFO - 正在将多部件要素转换为单部件...
2024-09-05 10:54:40,051 - INFO - 转换后共有 256241 个图斑
2024-09-05 10:54:40,052 - INFO - 开始第 1 轮处理
2024-09-05 10:54:43,755 - INFO - 小面积图斑数量: 77118, 大面积图斑数量: 179123
2024-09-05 10:54:43,992 - INFO - 图斑 42 没有找到邻居，跳过
2024-09-05 10:54:43,995 - INFO - 图斑 43 没有找到邻居，跳过
2024-09-05 10:54:43,998 - INFO - 图斑 44 没有找到邻居，跳过
2024-09-05 10:54:44,205 - INFO - 图斑 287 没有找到邻居，跳过
2024-09-05 10:54:44,284 - INFO - 图斑 397 没有找到邻居，跳过
2024-09-05 10:54:45,849 - INFO - 图斑 2209 没有找到邻居，跳过
2024-09-05 10:54:45,918 - INFO - 图斑 2296 没有找到邻居，跳过
2024-09-05 10:54:45,948 - INFO - 图斑 2312 没有找到邻居，跳过
2024-09-05 10:54:45,975 - INFO - 图斑 2340 没有找到邻居，跳过
2024-09-05 10:54:46,830 - INFO - 图斑 3272 没有找到邻居，跳过
2024-09-05 10:54:47,319 - INFO - 图斑 3804 没有找到邻居，跳过
2024-09-05 10:54:47,321 - INFO - 图斑 3805 没有找到邻居，跳过
2024-09-05 10:

In [3]:
import geopandas as gpd
import pandas as pd
import numpy as np
from shapely.ops import unary_union
from shapely.validation import make_valid
from shapely.geometry import Polygon, MultiPolygon
from shapely.affinity import scale
from tqdm import tqdm
import time
import logging

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

def safe_union(geom1, geom2):
    try:
        union = unary_union([geom1, geom2])
        if not union.is_valid:
            union = make_valid(union)
        return union.buffer(0)  # 使用buffer(0)来修复可能的拓扑错误
    except Exception as e:
        logging.error(f"合并几何形状时出错: {str(e)}")
        return None

def adjust_geometry(geometry, target_area):
    current_area = geometry.area
    scale_factor = np.sqrt(target_area / current_area)
    adjusted = scale(geometry, xfact=scale_factor, yfact=scale_factor, origin='centroid')
    return adjusted

def find_neighbors(parcel, gdf):
    try:
        possible_matches_index = list(gdf.sindex.intersection(parcel.geometry.bounds))
        possible_matches = gdf.iloc[possible_matches_index]
        return possible_matches[possible_matches.geometry.touches(parcel.geometry)]
    except Exception as e:
        logging.error(f"在查找邻居时出错: {str(e)}")
        return gpd.GeoDataFrame()

def mark_small_parcels_for_merging(gdf, small_parcels, large_parcels, dldm_field, dlmc_field):
    merge_operations = []
    for index, small_parcel in small_parcels.iterrows():
        neighbors = find_neighbors(small_parcel, large_parcels)
        if neighbors.empty:
            logging.info(f"图斑 {index} 没有找到邻居，跳过")
            continue
        
        same_dlmc_neighbors = neighbors[neighbors[dlmc_field] == small_parcel[dlmc_field]]
        if not same_dlmc_neighbors.empty:
            target = same_dlmc_neighbors.loc[same_dlmc_neighbors['area'].idxmax()]
        else:
            target = neighbors.loc[neighbors['area'].idxmax()]
        
        merge_operations.append((index, target.name))
    
    return merge_operations

def execute_merges(gdf, merge_operations, area_change_threshold=1.0):
    merged_count = 0
    skipped_count = 0
    total_area_change = 0
    area_changes = []
    
    gdf['to_delete'] = False
    
    for small_index, large_index in tqdm(merge_operations, desc="执行合并操作"):
        if small_index not in gdf.index or large_index not in gdf.index:
            skipped_count += 1
            logging.info(f"跳过合并操作：小图斑 {small_index} 或大图斑 {large_index} 不在索引中")
            continue
        
        small_geom = gdf.loc[small_index, 'geometry']
        large_geom = gdf.loc[large_index, 'geometry']
        
        original_area = small_geom.area + large_geom.area
        new_geometry = safe_union(small_geom, large_geom)
        
        if new_geometry is None:
            skipped_count += 1
            logging.info(f"跳过合并操作：无法合并图斑 {small_index} 和 {large_index}")
            continue
        
        new_area = new_geometry.area
        area_change = new_area - original_area
        
        if abs(area_change) > area_change_threshold:
            adjusted_geometry = adjust_geometry(new_geometry, original_area)
            new_area = adjusted_geometry.area
            area_change = new_area - original_area
            logging.info(f"调整了合并后的几何形状。原面积: {original_area}, 调整后面积: {new_area}, 剩余差异: {area_change}")
            new_geometry = adjusted_geometry
        
        total_area_change += area_change
        area_changes.append(area_change)
        
        gdf.loc[large_index, 'geometry'] = new_geometry
        gdf.loc[large_index, 'area'] = new_area
        gdf.loc[small_index, 'to_delete'] = True
        
        merged_count += 1
    
    gdf = gdf[~gdf['to_delete']]
    gdf = gdf.drop(columns=['to_delete'])
    
    logging.info(f"合并了 {merged_count} 个图斑，跳过了 {skipped_count} 个图斑")
    logging.info(f"总面积变化: {total_area_change}")
    
    if area_changes:
        logging.info(f"最大面积变化: {max(abs(change) for change in area_changes)}")
        logging.info(f"平均面积变化: {sum(abs(change) for change in area_changes) / len(area_changes)}")
    else:
        logging.info("没有执行任何合并操作")
    
    return gdf, merged_count, total_area_change

def merge_small_parcels(input_shp, output_base, dldm_field, dlmc_field, thresholds, default_threshold=50, max_iterations=10, area_change_threshold=1.0):
    start_time = time.time()
    logging.info(f"开始处理。输入Shapefile: {input_shp}")
    
    try:
        gdf = gpd.read_file(input_shp)
    except Exception as e:
        logging.error(f"读取shapefile时出错: {str(e)}")
        return
    
    original_count = len(gdf)
    original_area = gdf.geometry.area.sum()
    logging.info(f"读取了 {original_count} 个图斑，总面积: {original_area}")
    
    # 检查DLDM字段的唯一值
    unique_dldm = gdf[dldm_field].unique()
    logging.info(f"DLDM字段的唯一值: {unique_dldm}")
    
    logging.info("正在将多部件要素转换为单部件...")
    gdf = gdf.explode(index_parts=True).reset_index(drop=True)
    logging.info(f"转换后共有 {len(gdf)} 个图斑")
    
    iteration = 0
    total_merged = 0
    total_area_change = 0
    
    while iteration < max_iterations:
        iteration += 1
        logging.info(f"开始第 {iteration} 轮处理")
        
        gdf['area'] = gdf.geometry.area
        
        # 改进的筛选逻辑
        def get_threshold(row):
            dldm = row[dldm_field]
            if pd.isna(dldm) or dldm not in thresholds:
                return default_threshold
            return thresholds[dldm]
        
        gdf['threshold'] = gdf.apply(get_threshold, axis=1)
        small_parcels = gdf[gdf['area'] < gdf['threshold']]
        large_parcels = gdf[gdf['area'] >= gdf['threshold']]
        
        logging.info(f"小面积图斑数量: {len(small_parcels)}, 大面积图斑数量: {len(large_parcels)}")
        logging.info(f"小面积图斑DLDM分布: {small_parcels[dldm_field].value_counts().to_dict()}")
        
        if len(small_parcels) == 0:
            logging.info("没有找到小面积图斑，处理结束。")
            break
        
        merge_operations = mark_small_parcels_for_merging(gdf, small_parcels, large_parcels, dldm_field, dlmc_field)
        
        if not merge_operations:
            logging.info("没有可以合并的图斑，处理结束。")
            break
        
        gdf, merged_count, area_change = execute_merges(gdf, merge_operations, area_change_threshold)
        
        total_merged += merged_count
        total_area_change += area_change
        
        current_area = gdf.geometry.area.sum()
        logging.info(f"第 {iteration} 轮处理完成。本轮合并 {merged_count} 个图斑。当前总面积: {current_area}")
        logging.info(f"累计面积变化: {total_area_change}")
        
        if merged_count == 0:
            logging.info("没有更多图斑可以合并，处理结束。")
            break
    
    logging.info(f"处理完成。总共合并 {total_merged} 个图斑。")
    
    final_area = gdf.geometry.area.sum()
    logging.info(f"最终总面积: {final_area}")
    logging.info(f"总面积变化: {final_area - original_area}")
    
    # 保存结果到SHP（截断字段名）
    output_shp = f"{output_base}.shp"
    result_truncated = gdf.rename(columns={col: col[:10] for col in gdf.columns if len(col) > 10})
    result_truncated.to_file(output_shp, encoding='utf-8')
    logging.info(f"结果保存为SHP（字段名已截断）: {output_shp}")
    
    end_time = time.time()
    logging.info(f"总处理时间: {(end_time - start_time) / 60:.2f} 分钟")
    logging.info(f"最终图斑数量: {len(gdf)}")
    logging.info(f"减少的图斑数量: {original_count - len(gdf)}")

# 使用示例
input_shp = r"C:\Users\Runker\Desktop\ele_test\sb_merge_data.shp"
output_base = r"C:\Users\Runker\Desktop\ele_test\sb_merge_data_result_1"
dldm_field = "DLDM"
dlmc_field = "DLMC"
thresholds = {"01": 50, "02": 50, "03": 1000, "04": 1000}
default_threshold = 50  # 为未指定的DLDM类型设置默认阈值
area_change_threshold = 1.0  # 面积变化阈值（平方米）

merge_small_parcels(input_shp, output_base, dldm_field, dlmc_field, thresholds, default_threshold, area_change_threshold=area_change_threshold)

2024-09-05 10:46:59,648 - INFO - 开始处理。输入Shapefile: C:\Users\Runker\Desktop\ele_test\sb_merge_data.shp
2024-09-05 10:47:50,017 - INFO - 读取了 165557 个图斑，总面积: 1463055810.020485
2024-09-05 10:47:50,044 - INFO - DLDM字段的唯一值: ['03' '01' '12' '02' '04']
2024-09-05 10:47:50,044 - INFO - 正在将多部件要素转换为单部件...
2024-09-05 10:47:52,136 - INFO - 转换后共有 256241 个图斑
2024-09-05 10:47:52,136 - INFO - 开始第 1 轮处理
2024-09-05 10:47:53,259 - INFO - 小面积图斑数量: 77118, 大面积图斑数量: 179123
2024-09-05 10:47:53,259 - INFO - 小面积图斑DLDM分布: {'03': 53156, '01': 20733, '04': 2061, '02': 747, '12': 421}
2024-09-05 10:47:53,465 - INFO - 图斑 42 没有找到邻居，跳过
2024-09-05 10:47:53,465 - INFO - 图斑 43 没有找到邻居，跳过
2024-09-05 10:47:53,465 - INFO - 图斑 44 没有找到邻居，跳过
2024-09-05 10:47:53,600 - INFO - 图斑 287 没有找到邻居，跳过
2024-09-05 10:47:53,681 - INFO - 图斑 397 没有找到邻居，跳过
2024-09-05 10:47:54,899 - INFO - 图斑 2209 没有找到邻居，跳过
2024-09-05 10:47:54,950 - INFO - 图斑 2296 没有找到邻居，跳过
2024-09-05 10:47:54,966 - INFO - 图斑 2312 没有找到邻居，跳过
2024-09-05 10:47:54,999 - INFO - 图斑 234

In [None]:
# 稳定快速


In [6]:
import geopandas as gpd
import pandas as pd
import numpy as np
from shapely.ops import unary_union
from shapely.validation import make_valid
from tqdm import tqdm
import time
import logging

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

def safe_union(geom1, geom2):
    try:
        union = unary_union([geom1, geom2])
        if not union.is_valid:
            union = make_valid(union)
        return union
    except Exception as e:
        logging.error(f"合并几何形状时出错: {str(e)}")
        return None

def find_neighbors(parcel, gdf):
    try:
        possible_matches_index = list(gdf.sindex.intersection(parcel.geometry.bounds))
        possible_matches = gdf.iloc[possible_matches_index]
        return possible_matches[possible_matches.geometry.touches(parcel.geometry)]
    except Exception as e:
        logging.error(f"在查找邻居时出错: {str(e)}")
        return gpd.GeoDataFrame()

def mark_small_parcels_for_merging(gdf, small_parcels, large_parcels, dldm_field, dlmc_field):
    merge_operations = []
    for index, small_parcel in small_parcels.iterrows():
        neighbors = find_neighbors(small_parcel, large_parcels)
        if neighbors.empty:
            # logging.info(f"图斑 {index} 没有找到邻居，跳过")
            continue
        
        same_dlmc_neighbors = neighbors[neighbors[dlmc_field] == small_parcel[dlmc_field]]
        if not same_dlmc_neighbors.empty:
            target = same_dlmc_neighbors.loc[same_dlmc_neighbors['area'].idxmax()]
        else:
            continue  # 只合并相同 DLMC 的图斑
        
        merge_operations.append((index, target.name))
    
    return merge_operations

def execute_merges(gdf, merge_operations):
    merged_count = 0
    skipped_count = 0
    
    gdf['to_delete'] = False
    
    for small_index, large_index in tqdm(merge_operations, desc="执行合并操作"):
        if small_index not in gdf.index or large_index not in gdf.index:
            skipped_count += 1
            logging.info(f"跳过合并操作：小图斑 {small_index} 或大图斑 {large_index} 不在索引中")
            continue
        
        small_geom = gdf.loc[small_index, 'geometry']
        large_geom = gdf.loc[large_index, 'geometry']
        
        new_geometry = safe_union(small_geom, large_geom)
        
        if new_geometry is None:
            skipped_count += 1
            logging.info(f"跳过合并操作：无法合并图斑 {small_index} 和 {large_index}")
            continue
        
        gdf.loc[large_index, 'geometry'] = new_geometry
        gdf.loc[large_index, 'area'] = new_geometry.area
        gdf.loc[small_index, 'to_delete'] = True
        
        merged_count += 1
    
    gdf = gdf[~gdf['to_delete']]
    gdf = gdf.drop(columns=['to_delete'])
    
    logging.info(f"合并了 {merged_count} 个图斑，跳过了 {skipped_count} 个图斑")
    
    return gdf, merged_count

def merge_small_parcels(input_shp, output_base, dldm_field, dlmc_field, thresholds, default_threshold=50, max_iterations=10):
    start_time = time.time()
    logging.info(f"开始处理。输入Shapefile: {input_shp}")
    
    try:
        gdf = gpd.read_file(input_shp)
    except Exception as e:
        logging.error(f"读取shapefile时出错: {str(e)}")
        return
    
    original_crs = gdf.crs
    original_count = len(gdf)
    original_area = gdf.geometry.area.sum()
    logging.info(f"读取了 {original_count} 个图斑，总面积: {original_area}")
    logging.info(f"原始坐标系统: {original_crs}")
    
    logging.info("正在将多部件要素转换为单部件...")
    gdf = gdf.explode(index_parts=True).reset_index(drop=True)
    logging.info(f"转换后共有 {len(gdf)} 个图斑")
    
    iteration = 0
    total_merged = 0
    
    while iteration < max_iterations:
        iteration += 1
        logging.info(f"开始第 {iteration} 轮处理")
        
        gdf['area'] = gdf.geometry.area
        
        def get_threshold(row):
            dldm = row[dldm_field]
            return thresholds.get(dldm, default_threshold)
        
        gdf['threshold'] = gdf.apply(get_threshold, axis=1)
        small_parcels = gdf[gdf['area'] < gdf['threshold']]
        large_parcels = gdf[gdf['area'] >= gdf['threshold']]
        
        logging.info(f"小面积图斑数量: {len(small_parcels)}, 大面积图斑数量: {len(large_parcels)}")
        logging.info(f"小面积图斑DLDM分布: {small_parcels[dldm_field].value_counts().to_dict()}")
        
        if len(small_parcels) == 0:
            logging.info("没有找到小面积图斑，处理结束。")
            break
        
        merge_operations = mark_small_parcels_for_merging(gdf, small_parcels, large_parcels, dldm_field, dlmc_field)
        
        if not merge_operations:
            logging.info("没有可以合并的图斑，处理结束。")
            break
        
        gdf, merged_count = execute_merges(gdf, merge_operations)
        
        total_merged += merged_count
        
        current_area = gdf.geometry.area.sum()
        logging.info(f"第 {iteration} 轮处理完成。本轮合并 {merged_count} 个图斑。当前总面积: {current_area}")
        
        if merged_count == 0:
            logging.info("没有更多图斑可以合并，处理结束。")
            break
    
    logging.info(f"处理完成。总共合并 {total_merged} 个图斑。")
    
    final_area = gdf.geometry.area.sum()
    logging.info(f"最终总面积: {final_area}")
    logging.info(f"总面积变化: {final_area - original_area}")
    
    # 确保坐标系统不变
    gdf = gdf.set_crs(original_crs, allow_override=True)
    
    # 保存结果到SHP（截断字段名）
    output_shp = f"{output_base}.shp"
    result_truncated = gdf.rename(columns={col: col[:10] for col in gdf.columns if len(col) > 10})
    result_truncated.to_file(output_shp, encoding='utf-8')
    logging.info(f"结果保存为SHP（字段名已截断）: {output_shp}")
    
    end_time = time.time()
    logging.info(f"总处理时间: {(end_time - start_time) / 60:.2f} 分钟")
    logging.info(f"最终图斑数量: {len(gdf)}")
    logging.info(f"减少的图斑数量: {original_count - len(gdf)}")

# 使用示例
input_shp = r"C:\Users\Runker\Desktop\ele_test\sb_merge_data_result.shp"
output_base = r"C:\Users\Runker\Desktop\ele_test\sb_merge_data_result_result"
dldm_field = "DLDM"
dlmc_field = "DLMC"
thresholds = {"01": 50, "02": 50, "03": 50, "04": 50}
default_threshold = 50  # 为未指定的DLDM类型设置默认阈值

merge_small_parcels(input_shp, output_base, dldm_field, dlmc_field, thresholds, default_threshold)

2024-09-05 11:15:03,501 - INFO - 开始处理。输入Shapefile: C:\Users\Runker\Desktop\ele_test\sb_merge_data_result.shp
2024-09-05 11:16:00,659 - INFO - 读取了 186150 个图斑，总面积: 1463055810.020485
2024-09-05 11:16:00,659 - INFO - 原始坐标系统: EPSG:4545
2024-09-05 11:16:00,659 - INFO - 正在将多部件要素转换为单部件...
2024-09-05 11:16:02,413 - INFO - 转换后共有 188099 个图斑
2024-09-05 11:16:02,413 - INFO - 开始第 1 轮处理
2024-09-05 11:16:03,294 - INFO - 小面积图斑数量: 1971, 大面积图斑数量: 186128
2024-09-05 11:16:03,295 - INFO - 小面积图斑DLDM分布: {'03': 1043, '01': 805, '04': 86, '02': 23, '12': 14}
执行合并操作: 100%|██████████| 1955/1955 [00:00<00:00, 2432.94it/s]
2024-09-05 11:16:08,601 - INFO - 合并了 1955 个图斑，跳过了 0 个图斑
2024-09-05 11:16:08,676 - INFO - 第 1 轮处理完成。本轮合并 1955 个图斑。当前总面积: 1463055810.020485
2024-09-05 11:16:08,676 - INFO - 开始第 2 轮处理
2024-09-05 11:16:09,558 - INFO - 小面积图斑数量: 16, 大面积图斑数量: 186128
2024-09-05 11:16:09,558 - INFO - 小面积图斑DLDM分布: {'12': 7, '03': 5, '01': 4}
执行合并操作: 100%|██████████| 5/5 [00:00<00:00, 317.52it/s]
2024-09-05 11:16:09,836 - I