In [1]:
import arcpy
from arcpy import env
import arcpy
import traceback
import multiprocessing
import os
from tqdm import tqdm

In [2]:
env.workspace = r'C:\Users\Runker\Desktop\DEM_test\multi.gdb'

In [None]:
def merge_small_parcels(input_fc, output_fc, land_type_field, dz_field, thresholds, output_gdb=None, num_processes=4):
    try:
        # 如果没有指定输出地理数据库路径,则使用当前工作空间
        if output_gdb is None:
            output_gdb = arcpy.env.workspace

        # 获取每个 DLMC 类型需要合并的小图斑数量
        dlmc_type_counts = {}
        with arcpy.da.SearchCursor(input_fc, [land_type_field, dz_field, "SHAPE@AREA"]) as cursor:
            for row in cursor:
                land_type, dz, area = row
                threshold = thresholds.get(dz, min(thresholds.values()))  # 如果没有指定阈值，使用最小的阈值
                if area < threshold:
                    dlmc_type_counts[land_type] = dlmc_type_counts.get(land_type, 0) + 1

        # 按照小图斑数量降序排列 DLMC 类型
        dlmc_types_sorted = sorted(dlmc_type_counts.keys(), key=lambda x: dlmc_type_counts[x], reverse=True)

        # 将 DLMC 类型分配给不同的进程
        dlmc_types_per_process = [[] for _ in range(num_processes)]
        current_process = 0
        for dlmc_type in dlmc_types_sorted:
            dlmc_types_per_process[current_process].append(dlmc_type)
            current_process = (current_process + 1) % num_processes

        # 创建进程池
        pool = multiprocessing.Pool(processes=num_processes)

        # 并行处理每个进程的 DLMC 类型
        results = []
        for i in range(num_processes):
            output_fc_part = os.path.join(output_gdb, f"{output_fc}_part{i}")
            result = pool.apply_async(process_dlmc_types, (input_fc, output_fc_part, land_type_field, dz_field, thresholds, dlmc_types_per_process[i]))
            results.append(result)

        # 等待所有进程完成
        pool.close()
        pool.join()

        # 合并各个进程的输出
        arcpy.CreateFeatureclass_management(output_gdb, output_fc, "POLYGON", results[0].get(), "DISABLED", "DISABLED", input_fc)
        for result in results[1:]:
            arcpy.Append_management(result.get(), os.path.join(output_gdb, output_fc), "NO_TEST")

        print(f"操作完成。结果保存在要素类: {os.path.join(output_gdb, output_fc)}")

    except Exception as e:
        print(f"发生错误: {str(e)}")
        print(traceback.format_exc())


def process_dlmc_types(input_fc, output_fc_part, land_type_field, dz_field, thresholds, dlmc_types):
    try:
        # 创建输出要素类
        output_gdb = os.path.dirname(output_fc_part)
        arcpy.CreateFeatureclass_management(output_gdb, os.path.basename(output_fc_part), "POLYGON", input_fc, "DISABLED", "DISABLED", input_fc)

        # 确保存在面积字段
        if "Area" not in [f.name for f in arcpy.ListFields(output_fc_part)]:
            arcpy.AddField_management(output_fc_part, "Area", "DOUBLE")

        # 复制属于当前进程的 DLMC 类型的要素
        where_clause = " OR ".join([f"{land_type_field} = '{dlmc_type}'" for dlmc_type in dlmc_types])
        arcpy.Select_analysis(input_fc, output_fc_part, where_clause)

        # 计算面积
        arcpy.CalculateField_management(output_fc_part, "Area", "!shape.area!", "PYTHON3")

        # 创建图层
        arcpy.MakeFeatureLayer_management(output_fc_part, "output_layer")

        # 获取小面积图斑
        small_parcels = []
        with arcpy.da.SearchCursor(output_fc_part, ["OID@", land_type_field, dz_field, "Area", "SHAPE@"]) as cursor:
            for row in cursor:
                oid, land_type, dz, area, shape = row
                threshold = thresholds.get(dz, min(thresholds.values()))  # 如果没有指定阈值，使用最小的阈值
                if area < threshold:
                    small_parcels.append((oid, land_type, dz, area, shape))

        # 处理小面积图斑
        merged_count = 0
        with arcpy.da.Editor(output_gdb) as editor:
            for small_parcel in small_parcels:
                oid, land_type, dz, area, shape = small_parcel

                # 选择相邻图斑
                arcpy.SelectLayerByLocation_management("output_layer", "BOUNDARY_TOUCHES", shape)

                # 在选择集中查找最大的相同地类图斑
                max_area = 0
                max_oid = None
                with arcpy.da.SearchCursor("output_layer", ["OID@", land_type_field, "Area"]) as neighbor_cursor:
                    for neighbor in neighbor_cursor:
                        if neighbor[0] != oid and neighbor[1] == land_type and neighbor[2] > max_area:
                            max_area = neighbor[2]
                            max_oid = neighbor[0]

                if max_oid:
                    # 合并几何
                    where_clause = f"OBJECTID IN ({oid}, {max_oid})"
                    with arcpy.da.UpdateCursor(output_fc_part, ["OID@", "SHAPE@", land_type_field], where_clause) as update_cursor:
                        shapes = []
                        for update_row in update_cursor:
                            shapes.append(update_row[1])
                            if update_row[0] == oid:
                                update_cursor.deleteRow()

                        if len(shapes) == 2:
                            merged_shape = shapes[0].union(shapes[1])
                            update_cursor.reset()
                            for update_row in update_cursor:
                                if update_row[0] == max_oid:
                                    update_cursor.updateRow([max_oid, merged_shape, land_type])
                                    merged_count += 1
                                    break

                # 清除选择
                arcpy.SelectLayerByAttribute_management("output_layer", "CLEAR_SELECTION")

        # 返回进程的输出要素类路径
        return output_fc_part

    except Exception as e:
        print(f"发生错误: {str(e)}")
        print(traceback.format_exc())
        return None

In [3]:
def merge_small_parcels(input_fc, output_fc, land_type_field, dz_field, thresholds, output_gdb=None, num_processes=4):
    try:
        if output_gdb is None:
            output_gdb = arcpy.env.workspace

        # 获取总要素数量
        total_features = int(arcpy.GetCount_management(input_fc)[0])

        # 获取每个 DLMC 类型需要合并的小图斑数量
        dlmc_type_counts = {}
        with arcpy.da.SearchCursor(input_fc, [land_type_field, dz_field, "SHAPE@AREA"]) as cursor:
            for row in tqdm(cursor, total=total_features, desc="Analyzing features"):
                land_type, dz, area = row
                threshold = thresholds.get(dz, min(thresholds.values()))
                if area < threshold:
                    dlmc_type_counts[land_type] = dlmc_type_counts.get(land_type, 0) + 1

        dlmc_types_sorted = sorted(dlmc_type_counts.keys(), key=lambda x: dlmc_type_counts[x], reverse=True)

        dlmc_types_per_process = [[] for _ in range(num_processes)]
        current_process = 0
        for dlmc_type in dlmc_types_sorted:
            dlmc_types_per_process[current_process].append(dlmc_type)
            current_process = (current_process + 1) % num_processes

        pool = multiprocessing.Pool(processes=num_processes)

        results = []
        for i in range(num_processes):
            output_fc_part = os.path.join(output_gdb, f"{output_fc}_part{i}")
            result = pool.apply_async(process_dlmc_types, (input_fc, output_fc_part, land_type_field, dz_field, thresholds, dlmc_types_per_process[i], i+1, num_processes))
            results.append(result)

        pool.close()

        # 使用tqdm来显示总体进度
        for result in tqdm(results, total=num_processes, desc="Overall progress"):
            result.get()

        pool.join()

        arcpy.CreateFeatureclass_management(output_gdb, output_fc, "POLYGON", results[0].get(), "DISABLED", "DISABLED", input_fc)
        for result in results[1:]:
            arcpy.Append_management(result.get(), os.path.join(output_gdb, output_fc), "NO_TEST")

        print(f"操作完成。结果保存在要素类: {os.path.join(output_gdb, output_fc)}")

    except Exception as e:
        print(f"发生错误: {str(e)}")
        print(traceback.format_exc())

def process_dlmc_types(input_fc, output_fc_part, land_type_field, dz_field, thresholds, dlmc_types, process_num, total_processes):
    try:
        output_gdb = os.path.dirname(output_fc_part)
        arcpy.CreateFeatureclass_management(output_gdb, os.path.basename(output_fc_part), "POLYGON", input_fc, "DISABLED", "DISABLED", input_fc)

        if "Area" not in [f.name for f in arcpy.ListFields(output_fc_part)]:
            arcpy.AddField_management(output_fc_part, "Area", "DOUBLE")

        where_clause = " OR ".join([f"{land_type_field} = '{dlmc_type}'" for dlmc_type in dlmc_types])
        arcpy.Select_analysis(input_fc, output_fc_part, where_clause)

        arcpy.CalculateField_management(output_fc_part, "Area", "!shape.area!", "PYTHON3")

        arcpy.MakeFeatureLayer_management(output_fc_part, "output_layer")

        small_parcels = []
        with arcpy.da.SearchCursor(output_fc_part, ["OID@", land_type_field, dz_field, "Area", "SHAPE@"]) as cursor:
            for row in cursor:
                oid, land_type, dz, area, shape = row
                threshold = thresholds.get(dz, min(thresholds.values()))
                if area < threshold:
                    small_parcels.append((oid, land_type, dz, area, shape))

        merged_count = 0
        with arcpy.da.Editor(output_gdb) as editor:
            for small_parcel in tqdm(small_parcels, desc=f"Process {process_num}/{total_processes}", position=process_num):
                oid, land_type, dz, area, shape = small_parcel

                arcpy.SelectLayerByLocation_management("output_layer", "BOUNDARY_TOUCHES", shape)

                max_area = 0
                max_oid = None
                with arcpy.da.SearchCursor("output_layer", ["OID@", land_type_field, "Area"]) as neighbor_cursor:
                    for neighbor in neighbor_cursor:
                        if neighbor[0] != oid and neighbor[1] == land_type and neighbor[2] > max_area:
                            max_area = neighbor[2]
                            max_oid = neighbor[0]

                if max_oid:
                    where_clause = f"OBJECTID IN ({oid}, {max_oid})"
                    with arcpy.da.UpdateCursor(output_fc_part, ["OID@", "SHAPE@", land_type_field], where_clause) as update_cursor:
                        shapes = []
                        for update_row in update_cursor:
                            shapes.append(update_row[1])
                            if update_row[0] == oid:
                                update_cursor.deleteRow()

                        if len(shapes) == 2:
                            merged_shape = shapes[0].union(shapes[1])
                            update_cursor.reset()
                            for update_row in update_cursor:
                                if update_row[0] == max_oid:
                                    update_cursor.updateRow([max_oid, merged_shape, land_type])
                                    merged_count += 1
                                    break

                arcpy.SelectLayerByAttribute_management("output_layer", "CLEAR_SELECTION")

        return output_fc_part

    except Exception as e:
        print(f"发生错误: {str(e)}")
        print(traceback.format_exc())
        return None

In [3]:
import arcpy
import multiprocessing
import os
import traceback
from tqdm import tqdm
import time

def merge_small_parcels(input_fc, output_fc, land_type_field, dz_field, thresholds, output_gdb=None, num_processes=4):
    try:
        if output_gdb is None:
            output_gdb = arcpy.env.workspace

        total_features = int(arcpy.GetCount_management(input_fc)[0])

        dlmc_type_counts = {}
        with arcpy.da.SearchCursor(input_fc, [land_type_field, dz_field, "SHAPE@AREA"]) as cursor:
            for row in tqdm(cursor, total=total_features, desc="Analyzing features"):
                land_type, dz, area = row
                threshold = thresholds.get(dz, min(thresholds.values()))
                if area < threshold:
                    dlmc_type_counts[land_type] = dlmc_type_counts.get(land_type, 0) + 1

        dlmc_types_sorted = sorted(dlmc_type_counts.keys(), key=lambda x: dlmc_type_counts[x], reverse=True)

        dlmc_types_per_process = [[] for _ in range(num_processes)]
        current_process = 0
        for dlmc_type in dlmc_types_sorted:
            dlmc_types_per_process[current_process].append(dlmc_type)
            current_process = (current_process + 1) % num_processes

        manager = multiprocessing.Manager()
        progress_dict = manager.dict()
        for i in range(num_processes):
            progress_dict[i] = 0

        pool = multiprocessing.Pool(processes=num_processes)

        results = []
        for i in range(num_processes):
            output_fc_part = os.path.join(output_gdb, f"{output_fc}_part{i}")
            result = pool.apply_async(process_dlmc_types, (input_fc, output_fc_part, land_type_field, dz_field, thresholds, dlmc_types_per_process[i], i, progress_dict))
            results.append(result)

        pool.close()

        pbar = tqdm(total=100*num_processes, desc="Overall progress")
        last_total = 0
        while any(result.ready() == False for result in results):
            total = sum(progress_dict.values())
            pbar.update(total - last_total)
            last_total = total
            time.sleep(1)

        pbar.update(100*num_processes - last_total)
        pbar.close()

        pool.join()

        arcpy.CreateFeatureclass_management(output_gdb, output_fc, "POLYGON", results[0].get(), "DISABLED", "DISABLED", input_fc)
        for result in results[1:]:
            arcpy.Append_management(result.get(), os.path.join(output_gdb, output_fc), "NO_TEST")

        print(f"操作完成。结果保存在要素类: {os.path.join(output_gdb, output_fc)}")

    except Exception as e:
        print(f"发生错误: {str(e)}")
        print(traceback.format_exc())

def process_dlmc_types(input_fc, output_fc_part, land_type_field, dz_field, thresholds, dlmc_types, process_num, progress_dict):
    try:
        output_gdb = os.path.dirname(output_fc_part)
        arcpy.CreateFeatureclass_management(output_gdb, os.path.basename(output_fc_part), "POLYGON", input_fc, "DISABLED", "DISABLED", input_fc)

        if "Area" not in [f.name for f in arcpy.ListFields(output_fc_part)]:
            arcpy.AddField_management(output_fc_part, "Area", "DOUBLE")

        where_clause = " OR ".join([f"{land_type_field} = '{dlmc_type}'" for dlmc_type in dlmc_types])
        arcpy.Select_analysis(input_fc, output_fc_part, where_clause)

        arcpy.CalculateField_management(output_fc_part, "Area", "!shape.area!", "PYTHON3")

        arcpy.MakeFeatureLayer_management(output_fc_part, "output_layer")

        small_parcels = []
        with arcpy.da.SearchCursor(output_fc_part, ["OID@", land_type_field, dz_field, "Area", "SHAPE@"]) as cursor:
            for row in cursor:
                oid, land_type, dz, area, shape = row
                threshold = thresholds.get(dz, min(thresholds.values()))
                if area < threshold:
                    small_parcels.append((oid, land_type, dz, area, shape))

        total_parcels = len(small_parcels)
        merged_count = 0

        with arcpy.da.Editor(output_gdb) as editor:
            for small_parcel in small_parcels:
                oid, land_type, dz, area, shape = small_parcel

                arcpy.SelectLayerByLocation_management("output_layer", "BOUNDARY_TOUCHES", shape)

                max_area = 0
                max_oid = None
                with arcpy.da.SearchCursor("output_layer", ["OID@", land_type_field, "Area"]) as neighbor_cursor:
                    for neighbor in neighbor_cursor:
                        if neighbor[0] != oid and neighbor[1] == land_type and neighbor[2] > max_area:
                            max_area = neighbor[2]
                            max_oid = neighbor[0]

                if max_oid:
                    where_clause = f"OBJECTID IN ({oid}, {max_oid})"
                    with arcpy.da.UpdateCursor(output_fc_part, ["OID@", "SHAPE@", land_type_field], where_clause) as update_cursor:
                        shapes = []
                        for update_row in update_cursor:
                            shapes.append(update_row[1])
                            if update_row[0] == oid:
                                update_cursor.deleteRow()

                        if len(shapes) == 2:
                            merged_shape = shapes[0].union(shapes[1])
                            update_cursor.reset()
                            for update_row in update_cursor:
                                if update_row[0] == max_oid:
                                    update_cursor.updateRow([max_oid, merged_shape, land_type])
                                    merged_count += 1
                                    break

                arcpy.SelectLayerByAttribute_management("output_layer", "CLEAR_SELECTION")

                progress_dict[process_num] = int((merged_count + 1) / total_parcels * 100)

        return output_fc_part

    except Exception as e:
        print(f"发生错误: {str(e)}")
        print(traceback.format_exc())
        return None

In [4]:
# 使用示例
if __name__ == "__main__":
    input_fc = "DY_single"
    output_fc = "DY_single_end"
    land_type_field = "DLMC"
    dz_field = "DZ"
    thresholds = {
        "01": 50,
        "03": 1000,
        "04": 1000,
        # 可以继续添加其他DZ值的阈值
    }

    merge_small_parcels(input_fc=input_fc, output_fc=output_fc, land_type_field=land_type_field, dz_field=dz_field, thresholds=thresholds)

Analyzing features: 100%|██████████| 284220/284220 [00:02<00:00, 96225.84it/s] 
Overall progress:   0%|          | 0/400 [24:46<?, ?it/s]