# 生成土壤类型虚拟点

In [1]:
# type:ignore
import os
import pandas as pd
import arcpy
from arcpy import env
from arcpy.management import *
from arcpy.sa import *
from arcpy.da import *
from arcpy.conversion import *
from arcpy.analysis import *
import numpy as np

In [2]:
# type:ignore
# tool function
# 创建渔网
def create_net(output_file,extent_file,cell_size):
    """参数说明：
        output_file: 输出文件
        extent_file: 输入范围文件
        cell_size: 生成的cell大小
    """
    proj = arcpy.Describe(extent_file).spatialReference
    extent_x_min = arcpy.Describe(extent_file).extent.XMin
    extent_y_min = arcpy.Describe(extent_file).extent.YMin
    extent_x_max = arcpy.Describe(extent_file).extent.XMax
    extent_y_max = arcpy.Describe(extent_file).extent.YMax
    with arcpy.EnvManager(outputCoordinateSystem = proj):
        arcpy.management.CreateFishnet(out_feature_class=output_file,origin_coord=f"{extent_x_min} {extent_y_min}",y_axis_coord=f"{extent_x_min} {extent_y_min+10}",cell_width=cell_size,cell_height=cell_size,number_rows=None,number_columns=None,corner_coord=f"{extent_x_max} {extent_y_max}",labels="NO_LABELS",template="#",geometry_type="POLYGON")
# 面转点
def polygon_point(in_feature, out_feature):
    """参数说明：
        in_feature: 输入面
        out_feature: 输出点
    """
    proj = arcpy.Describe(in_feature).spatialReference
    with arcpy.EnvManager(outputCoordinateSystem=proj):
        arcpy.management.FeatureToPoint(in_features=in_feature,out_feature_class=out_feature,point_location="INSIDE")

# 筛选点

def select_point(input_polygon,input_point,output_point):
    """参数说明：
        input_polygon: 选择面
        input_point: 待选择点
        output_point: 选择点
    """
    polygon_layer = "polygonLayer"
    point_layer = "pointLayer"
    arcpy.MakeFeatureLayer_management(input_polygon, polygon_layer)
    arcpy.MakeFeatureLayer_management(input_point, point_layer)
    arcpy.SelectLayerByLocation_management(in_layer=point_layer, overlap_type="INTERSECT", select_features=polygon_layer,search_distance=0,selection_type='NEW_SELECTION',invert_spatial_relationship="NOT_INVERT")
    # 导出选择的要素
    arcpy.CopyFeatures_management(point_layer, output_point)

# 添加字段
def add_field(input_table,field_name,field_type='TEXT'):
    """参数说明：
        input_table: 输入数据表
        field_name: 字段名
        field_type: 字段类型"""
    arcpy.AddField_management(input_table,field_name,field_type)
# 删除要素
def delete_feature(input_feature):
    arcpy.Delete_management(input_feature)
# 空间连接
def perform_spatial_join(target_layer_path, join_layer_path, output_layer_path, field_mapping_dict):
    """参数说明：
        target_layer_path: 目标图层路径 (点图层)
        join_layer_path: 连接图层路径 (包含属性的面图层)
        output_layer_path: 输出图层路径
        field_mapping_dict: 字段映射字典 {目标字段:源字段}
    """
    # 创建空的 FieldMappings 对象
    field_mappings = arcpy.FieldMappings()

    # 只添加需要的字段映射
    for target_field, source_field in field_mapping_dict.items():
        # 创建字段映射对象
        field_map = arcpy.FieldMap()
        
        # 添加源字段（从连接图层）
        field_map.addInputField(join_layer_path, source_field)
        
        # 设置输出字段属性
        output_field = field_map.outputField
        output_field.name = target_field
        output_field.aliasName = target_field
        field_map.outputField = output_field
        
        # 添加到field_mappings
        field_mappings.addFieldMap(field_map)

    # 执行空间连接操作
    arcpy.analysis.SpatialJoin(
        target_layer_path, 
        join_layer_path, 
        output_layer_path,
        "JOIN_ONE_TO_ONE", 
        "KEEP_ALL", 
        field_mappings,
        match_option="INTERSECT"
    )

def perform_multiple_spatial_joins(target_layer_path, join_layers_info, final_output):
    """
    对一个目标图层执行多个空间连接操作,并清理中间文件
    
    参数说明：
        target_layer_path: 目标图层路径 (点图层)
        join_layers_info: 连接图层信息列表，每个元素为字典，包含：
            {
                'join_layer': 连接图层路径,
                'field_mapping': 字段映射字典 {目标字段:源字段}
            }
        final_output: 最终输出图层路径
    """
    current_input = target_layer_path
    temp_outputs = []  # 存储中间输出文件路径
    
    # 获取工作空间路径
    workspace = os.path.dirname(target_layer_path)
    
    for idx, join_info in enumerate(join_layers_info):
        # 获取当前连接操作的参数
        join_layer = join_info['join_layer']
        # 生成临时输出文件完整路径
        temp_output = os.path.join(workspace, f"temp_join_{idx}.shp")
        temp_outputs.append(temp_output)
        
        # 最后一次连接时使用最终输出路径
        output_layer = final_output if idx == len(join_layers_info) - 1 else temp_output
        field_mapping_dict = join_info['field_mapping']
        
        # 创建空的 FieldMappings 对象
        field_mappings = arcpy.FieldMappings()
        
        # 保留目标图层的所有字段
        field_mappings.addTable(current_input)
        field_mappings.addTable(join_layer)
        
        # 获取需要保留的字段列表
        keep_fields = [f.name for f in arcpy.ListFields(current_input)]  # 保留目标图层的所有字段
        keep_fields.extend(list(field_mapping_dict.keys()))  # 添加新的映射字段
        
        # 移除所有不需要的字段
        field_indices_to_remove = []
        for i in range(field_mappings.fieldCount):
            field = field_mappings.getFieldMap(i)
            if field.getInputFieldName(0) not in keep_fields and \
               field.outputField.name not in keep_fields:
                field_indices_to_remove.append(i)
        
        # 从后向前移除字段，避免索引变化
        for i in reversed(field_indices_to_remove):
            field_mappings.removeFieldMap(i)
        
        # 添加或更新需要的连接字段映射
        for target_field, source_field in field_mapping_dict.items():
            # 创建字段映射对象
            field_map = arcpy.FieldMap()
            
            # 添加源字段（从连接图层）
            field_map.addInputField(join_layer, source_field)
            
            # 设置输出字段属性
            output_field = field_map.outputField
            output_field.name = target_field
            output_field.aliasName = target_field
            field_map.outputField = output_field
            
            # 找到并替换现有的字段映射（如果存在）
            field_index = field_mappings.findFieldMapIndex(target_field)
            if field_index != -1:
                field_mappings.replaceFieldMap(field_index, field_map)
            else:
                field_mappings.addFieldMap(field_map)
        
        # 执行空间连接操作
        arcpy.analysis.SpatialJoin(
            current_input, 
            join_layer, 
            output_layer,
            "JOIN_ONE_TO_ONE", 
            "KEEP_ALL", 
            field_mappings,
            match_option="INTERSECT"
        )
        
        # 更新当前输入图层为最新的输出图层
        current_input = output_layer  # 这是关键修改

    # 清理中间文件
    for temp_file in temp_outputs[:-1]:  # 不删除最后一个输出文件
        if arcpy.Exists(temp_file):
            arcpy.Delete_management(temp_file)
# 点采样
def sample_point(point_,raster_,out_name):
    """根据栅格采样点,输出为表格"""
    Sample(raster_,point_,out_name,"NEAREST", "OBJECTID", "CURRENT_SLICE", None, '', None, None, "ROW_WISE", "TABLE")
    return None

# 导出CSV
def export_csv(table_,out_path,out_name):
    """参数说明：
        table_: 输入数据表
        out_path: 输出路径
        out_name: 输出表名"""
    TableToTable(table_,out_path,out_name)
    return None

# 要素转点
def feature_to_point(input_layer,output_layer):
    """参数说明："""
    proj = arcpy.Describe(input_layer).spatialReference
    with arcpy.EnvManager(outputCoordinateSystem=proj):
        arcpy.management.FeatureToPoint(input_layer, output_layer, "INSIDE")

# 删除多余字段
def delete_extra_fields(input_feature_class, fields_list):
    """参数说明："""
    fields = arcpy.ListFields(input_feature_class)
    for field in fields:
        name = field.name
        type = field.type.upper()
        # 如果字段名不在删选列表中且类型不为OID和Geometry便删除这个字段
        if name not in fields_list and type not in ['OID','GEOMETRY']:
            arcpy.DeleteField_management(input_feature_class, [name])

In [3]:
# 设置工作空间
env.workspace = r'F:\cache_data\shp_file\qz'
env.overwriteOutput=True

In [4]:
def create_point(input_layer):
    """生成虚拟点"""
    # 定义文件名称
    fish_net_name = "qz_grid"
    net_to_point_name = "qz_inside_point"
    select_point_name = "select_point"
    result_point_name = "join_point"
    # 渔网构建
    create_net(fish_net_name,input_layer,300)
    # 要素转点
    polygon_point(fish_net_name,net_to_point_name)
    # 按位置选择
    select_point(input_layer,net_to_point_name,select_point_name)
    # 添加字段
    field_list = ['TL','YL','TS','TZ']
    for one_field in field_list:
        # 添加字段
        add_field(input_table=select_point_name,field_name=one_field,field_type='TEXT')
    # 空间连接赋予地类属性
    # 定义目标要素图层和连接要素图层的路径  {目标字段:源字段}
    fields_mapping = {
        "TL": "清镇市",
        "YL": "清镇_1",
        "TS": "清镇_12",
        "TZ": "清镇__13"
    }
    # 空间连接
    perform_spatial_join(select_point_name,input_layer,result_point_name,fields_mapping)
    # 删除多余数据
    for one_feature in [fish_net_name,net_to_point_name,select_point_name]:
        delete_feature(one_feature)

In [5]:
# 生成渔网点
create_point(input_layer="qz_ep_polygon")

In [6]:
# 生成面内部点
feature_to_point(input_layer="qz_ep_polygon",output_layer="qz_tl_dh_point")

In [8]:
# 内部点字段整理
input_feature = r"F:\cache_data\shp_file\qz\qz_tl_dh_point.shp"
# 定义字段映射关系：新字段名:原字段名
field_mapping = {
    'TL': '清镇市',
    'YL': '清镇_1',
    'TS': '清镇_12',
    'TZ': '清镇__13'
}

# 添加字段并复制值
for new_field, source_field in field_mapping.items():
    # 添加新字段
    add_field(input_table=input_feature, field_name=new_field, field_type='TEXT')
    # 从原有字段复制值到新字段
    arcpy.CalculateField_management(input_feature, new_field, f"!{source_field}!", "PYTHON3")

# 只保留新添加的字段
delete_extra_fields(input_feature, list(field_mapping.keys()))

In [9]:
# 渔网点字段整理
# 添加字段
# input_feature = r"F:\cache_data\shp_file\ky\join_point.shp"
input_feature = r"F:\cache_data\shp_file\qz\qz_tl_dh_point.shp"
field_list = ['TL','YL','TS','TZ']
# 删除多余字段
delete_extra_fields(input_feature, field_list)

In [10]:
# 为两个点位数据增加label
feature_1 = r"F:\cache_data\shp_file\qz\join_point.shp"
feature_2 = r"F:\cache_data\shp_file\qz\qz_tl_dh_point.shp"
for one_feature in [[feature_1,'fish_net'],[feature_2,'inner']]:
    features = one_feature[0]
    labels = one_feature[1]
    field_name = "label"
    # 添加字段
    add_field(input_table=features,field_name="label",field_type='TEXT')
    # 注意：这里使用了Python表达式
    arcpy.CalculateField_management(features, field_name, f"'{labels}'", "PYTHON3")


In [11]:
# 合并点位数据
feature_1 = "join_point"
feature_2 = "qz_tl_dh_point"
output_feature = "filter_result_point"
# 合并
arcpy.management.Merge([feature_1,feature_2],output_feature)
# 删除过渡文件
for one_feature in [feature_1,feature_2]:
    delete_feature(one_feature)

In [12]:
# 添加经纬度字段并计算经纬度

# 输入和输出文件
input_feature_class = "filter_result_point"  # 请替换为您的点文件的名称

# 添加字段
arcpy.AddField_management(input_feature_class, "LON", "DOUBLE")
arcpy.AddField_management(input_feature_class, "LAT", "DOUBLE")

# 使用CalculateField_management计算经纬度
# 假设您的点文件有一个名为"SHAPE"的字段，它包含了点的几何信息
# arcpy.CalculateField_management(input_feature_class, "LON", "!SHAPE.firstPoint.X!", "PYTHON3")
# arcpy.CalculateField_management(input_feature_class, "LAT", "!SHAPE.firstPoint.Y!", "PYTHON3")
arcpy.management.CalculateGeometryAttributes(input_feature_class, [['LON','POINT_X'], ['LAT','POINT_Y']],coordinate_format='DD')
# print("字段添加和计算完成！")

# 名称完善

In [8]:
excel_path = r"D:\worker\工作\work\三普\数据\开阳\开阳县土种明细表20240905.xlsx"

In [9]:
df_excel = pd.read_excel(excel_path)
# 向前填充
df_excel = df_excel.ffill()

In [None]:
# 查看所有土种
table_list = list(set(df_excel['开阳土种'].unique()))
table_list

In [None]:
shp_list = list(set(pd.DataFrame(arcpy.da.FeatureClassToNumPyArray(r'F:\cache_data\shp_file\ky\filter_result_point.shp', ['TZ'])).TZ.unique()))
shp_list

In [None]:
# 比较两个列表
# 表中有的，shp中没有的
set(table_list) - set(shp_list)

In [None]:
# shp中有的，表中没有的
set(shp_list) - set(table_list)

In [14]:
# 使用groupby按照`土种编号`聚合，并应用自定义函数
# 定义聚合函数
def aggregate_rows(group):
    records = group[['三普土类', '三普亚类', '三普土属', '三普土种']].to_dict('records')
    return records[0] if records else {}

# 使用groupby按照`开阳土种`聚合
result_dict = df_excel.groupby('开阳土种').apply(aggregate_rows).to_dict()


In [None]:
result_dict,len(result_dict),len(df_excel['开阳土种'].unique())

In [None]:
# 去除字典中值和键的空格，并将中文括号替换为英文括号
result_dict = {str(k).strip(): {str(key).strip().replace('（', '(').replace('）', ')'): str(value).strip().replace('（', '(').replace('）', ')') for key, value in v.items()} for k, v in result_dict.items() if v}
result_dict, len(result_dict)

In [13]:
file_path2 = r'F:\cache_data\shp_file\qz\filter_result_point.shp'

In [14]:
for one_name in arcpy.ListFields(file_path2):
    print(one_name.name)

FID
Shape
Join_Count
TARGET_FID
TL
YL
TS
TZ
label
LON
LAT


In [27]:
code_block =f"""
def get_soil_class(soil_code):
    
    soil_dict = {result_dict}
    if soil_code in soil_dict:
        return soil_dict[soil_code]['三普土种']
    else:
        return "NONE"
"""
field_name = "NEW_TZ"
expression = "get_soil_class(!TZ!)"

In [None]:
arcpy.CalculateField_management(in_table=file_path2, field=field_name, expression=expression, expression_type="PYTHON3", code_block=code_block)

In [5]:
# 使用空间连接获取点位数据的地类名称、地质类别、坡位
point_layer = r"F:\cache_data\shp_file\qz\filter_result_point.shp"
dl_layer = r"F:\cache_data\shp_file\qz\qz_sd_polygon.shp"
dz_layer = r"C:\Users\Runker\Desktop\rock_type\predict_raster\rock_type_predict_smooth_smooth.shp"
pw_layer = r"F:\cache_data\shp_file\qz\qz_slopepostion_polygon.shp"

In [6]:
# 添加字段
add_field(input_table=point_layer,field_name="DL",field_type="TEXT")
add_field(input_table=point_layer,field_name="DLLB",field_type="TEXT")
add_field(input_table=point_layer,field_name="MZMC",field_type="TEXT")
add_field(input_table=point_layer,field_name="PW",field_type="TEXT")

In [7]:
# 空间连接
output_layer = r"F:\cache_data\shp_file\qz\filter_result_point_join.shp"
perform_multiple_spatial_joins(point_layer,[
    {'join_layer':dl_layer,'output_layer':'filter_result_point_dl','field_mapping':{'DL':'DLMC'}},
    {'join_layer':dl_layer,'output_layer':'filter_result_point_dllb','field_mapping':{'DLLB':'DLLB'}},
    {'join_layer':dz_layer,'output_layer':'filter_result_point_dz','field_mapping':{'MZMC':'rocks'}},
    {'join_layer':pw_layer,'output_layer':'filter_result_point_pw','field_mapping':{'PW':'gridcode'}}
],final_output=output_layer)


In [9]:
# 查看字段
print([one_field.name for one_field in arcpy.ListFields(output_layer)])

['FID', 'Shape', 'Join_Count', 'TARGET_FID', 'Join_Cou_1', 'TARGET_F_1', 'Join_Cou_2', 'TARGET_F_2', 'Join_Cou_3', 'TARGET_F_3', 'Join_Cou_4', 'TARGET_F_4', 'TL', 'YL', 'TS', 'TZ', 'label', 'LON', 'LAT', 'DL', 'DLLB', 'MZMC', 'PW']


In [10]:
# 清除多余字段
join_point_field_list = ['TL', 'YL', 'TS', 'TZ', 'label', 'LON', 'LAT', 'DL', 'DLLB', 'MZMC', 'PW']
# 删除多余字段
delete_extra_fields(output_layer, join_point_field_list)

In [14]:
# 删除DLLB值不为[水稻土、自然土、非自然土]的点
try:
    # 创建图层
    arcpy.MakeFeatureLayer_management(output_layer, "point_layer")

    # 选择符合条件的要素
    arcpy.SelectLayerByAttribute_management("point_layer", "NEW_SELECTION", "DLLB NOT in ('水稻土','自然土','非自然土')")

    # 删除选择的要素
    arcpy.DeleteFeatures_management("point_layer")

finally:
    # 删除临时图层
    arcpy.Delete_management("point_layer")


# 土壤分类系统字典查看

In [3]:
# 虚拟点路径
soil_point_path = r'F:\cache_data\shp_file\qz\filter_result_point_join.shp'

In [4]:
# 查看df
field_list = ['TL','YL','TS','TZ','DL','DLLB','MZMC','PW']
soil_type_df = pd.DataFrame(arcpy.da.FeatureClassToNumPyArray(soil_point_path, field_list))

In [5]:
# 去重
fileter_soil_type_df = soil_type_df.drop_duplicates()
# 删除包含' '的行
fileter_soil_type_df = fileter_soil_type_df[fileter_soil_type_df['TL'] != ' ']

In [6]:
fileter_soil_type_df

Unnamed: 0,TL,YL,TS,TZ,DL,DLLB,MZMC,PW
0,黄壤,黄泥土,黄泥土,黄泥土,水田,水稻土,河流冲积物,2.0
1,黄壤,黄泥土,黄泥土,黄泥土,林地,自然土,白云岩,2.0
2,黄壤,黄壤,硅铁质黄壤,硅铁质黄壤,种植园用地,非自然土,河流冲积物,3.0
4,水稻土,大眼泥田,大眼泥田,大眼泥田,水田,水稻土,河流冲积物,3.0
5,黄壤,黄泥土,黄泥土,黄泥土,林地,自然土,白云岩,5.0
...,...,...,...,...,...,...,...,...
14296,水稻土,黄泥田,黄沙泥田,黄沙泥田,旱地,非自然土,泥(页)岩,1.0
14366,水稻土,潮泥田,潮泥田,潮沙泥田,林地,自然土,泥岩,4.0
14380,石灰土,淋溶黄色石灰土,淋溶黄色石灰土,淋溶黄色石灰土,林地,自然土,砂岩,4.0
14381,石灰土,黄色石灰土,黄色石灰土,黄色石灰土,水田,水稻土,灰岩,5.0


In [16]:
fileter_soil_type_df.to_excel(r"C:\Users\Runker\Desktop\type_dict.xlsx",index=False)

In [13]:
# 根据字典返回计算值
result_soil_type_df = pd.read_excel(r"C:\Users\Runker\Desktop\type_dict.xlsx")

In [14]:
# 构造字典
# 创建一个查询字典，使用元组作为键来存储多个列的值
lookup_dict = {}
# 假设我们要使用的列是 TL, YL, TS, TZ, DL, DLLB, MZMC, PW
columns_to_match = ['TL', 'YL', 'TS', 'TZ', 'DL', 'DLLB', 'MZMC', 'PW']

# 构建查询字典
for idx, row in result_soil_type_df.iterrows():
    # 创建一个元组作为键，包含所有需要匹配的列的值
    key = tuple(row[columns_to_match])
    # 将 NEW_TZ 的值作为字典的值
    lookup_dict[key] = row['NEW_TZ']


In [17]:
def update_shp_with_new_tz(input_shp, lookup_dict, columns_to_match):
    """
    在shp文件中添加NEW_TZ字段并根据lookup_dict更新值
    """
    # 添加NEW_TZ字段
    arcpy.AddField_management(input_shp, "NEW_TZ", "TEXT", field_length=50)
    
    # 构造代码块
    code_block = f"""
def get_soil_type({', '.join(columns_to_match)}):
    # 将PW转换为整数
    try:
        PW = int(float(PW)) if PW is not None else None
    except:
        PW = None
        
    # 构建键元组
    key = (
        str(TL),
        str(YL),
        str(TS),
        str(TZ),
        str(DL),
        str(DLLB),
        str(MZMC),
        PW
    )
    # 查询字典
    lookup_dict = {lookup_dict}
    return lookup_dict.get(key, '未匹配')
"""
    
    # 构造表达式
    field_refs = [f"!{col}!" for col in columns_to_match]
    expression = f"get_soil_type({', '.join(field_refs)})"
    
    # 执行字段计算
    arcpy.CalculateField_management(
        in_table=input_shp,
        field="NEW_TZ",
        expression=expression,
        expression_type="PYTHON3",
        code_block=code_block
    )

In [18]:
# 假设你的shp文件路径为
input_shp = r"F:\cache_data\shp_file\qz\filter_result_point_join.shp"

# 需要匹配的字段列表
columns_to_match = ['TL', 'YL', 'TS', 'TZ', 'DL', 'DLLB', 'MZMC', 'PW']

# 调用函数
update_shp_with_new_tz(input_shp, lookup_dict, columns_to_match)

# 5MDEM数据拷贝

In [65]:
import os
import shutil

In [105]:
in_dir = r'G:\data\第三次全国土壤普查\贵州省地理国情普查框架数据更新DEM（5米格网_2000坐标系）\G48'
# in_dir = r'G:\data\第三次全国土壤普查\贵州省地理国情普查框架数据更新DEM（5米格网_2000坐标系）\H48'
# in_dir = r'G:\data\第三次全国土壤普查\贵州省地理国情普查框架数据更新DEM（5米格网_2000坐标系）\H49'
out_dir = r'F:\ArcgisData\m5\qz'

In [106]:
# 复制指定范围的文件夹到指定路径
mid_min = 25   # 23
min_max = 39   # 06

tail_min = 65  # 66  001
tail_max = 75  # 47  008

def get_file_name(file_name):
    mid = file_name[4:7]
    tail = file_name[7:]
    # 判断mid 和tail是否可以转换位数字
    if mid.isdigit() and tail.isdigit():
        if int(mid) >= mid_min and int(mid) <= min_max and int(tail) >= tail_min and int(tail) <= tail_max:
            return True
    else:
        print(file_name)
        return False

In [None]:
for one_dir in os.listdir(in_dir):
    if get_file_name(one_dir):
        if not os.path.exists(os.path.join(out_dir,one_dir)):
            shutil.copytree(os.path.join(in_dir,one_dir),os.path.join(out_dir,one_dir))
    else:
        continue