In [1]:
import os
import pandas as pd
import numpy as np
import shutil
import arcpy
from arcpy import env
from arcpy.management import *
from arcpy.sa import *
from arcpy.da import *
from arcpy.conversion import *

In [2]:
# 分析函数
# 取消并行处理
def disable_parallel_processing(func):
    def wrapper(*args, **kwargs):
        with arcpy.EnvManager(parallelProcessingFactor="0"):
            return func(*args, **kwargs)
    return wrapper
# 采样
def sample_point(point_,raster_,out_name):
    """根据栅格采样点,输出为表格"""
    Sample(raster_,point_,out_name,"NEAREST", "OBJECTID", "CURRENT_SLICE", None, '', None, None, "ROW_WISE", "TABLE")
    return None

# 导出CSV
def export_csv(table_,out_path,out_name):
    TableToTable(table_,out_path,out_name)
    return None


In [3]:
# 地理数据库路径
# base_gdb_5m = r"D:\ArcgisData\basedata\basetrain_5m.gdb"
base_gdb_5m = r"D:\ArcGISProjects\workspace\shbyq\features_data_dy.gdb"
# base_gdb_deep = r"D:\ArcgisData\pred_soildeep\pre_database\pred_soildeep_gz.gdb"
# 用于采样的标准数据库
stander_raster_gdb = base_gdb_5m
# 数据点文件路径
point_data = r"D:\ArcGISProjects\workspace\shbyq\features_data_vector.gdb\features\dy_point"  # 需要改变
# 存储采样数据表的文件地理数据库
sample_gdb_path = r"D:\ArcGISProjects\workspace\process_table_database\tabledata.gdb" # 需要改变
# 存储采样结果CSV文件的路径
sample_csv = r"D:\ArcGISProjects\workspace\shbyq\resule_table"
# 输出CSV文件的名称
sample_csv_name = "feature_dy_ph.csv" # 需要改变

In [4]:
# 采样点数据名称
sample_name = 'dy_point'
filed_list = [_.name for _ in arcpy.ListFields(point_data)]
print(filed_list)
# 需要保留的字段
# elements_yes = ['OBJECTID', '横坐标', '纵坐标', 'N', 'P', 'K2O', '有机质含量', 'pH']
elements_yes = ['OBJECTID', 'pH']
filter_list = [_ for _ in filed_list if _ in elements_yes]
print(filter_list)

['OBJECTID', 'Shape', 'lon', 'lat', 'B', 'Cu', 'Mn', 'N', 'P', 'Zn', 'K2O', 'som', 'pH']
['OBJECTID', 'pH']


In [5]:
# 采集的特征字段
# feature_list = ['BIO', 'PRE', 'SRA', 'TMP', 'VAP', 'WIN', 'DEM', 'NDVI', 'TDQS', 'LIGHT', 'LON', 'LAT', 'SLOPE', 'ASP', 'CUR', 'TWI3', 'TWI5', 'TPI3', 'TPI5']
feature_list = ['DEM', 'AnalyticalHillshading', 'Aspect', 'ChannelNetworkBaseLevel', 'ChannelNetworkDistance', 'ClosedDepressions', 'ConvergenceIndex', 'LSFactor', 'PlanCurvature', 'ProfileCurvature', 'RelativeSlopePosition', 'Slope', 'TopographicWetnessIndex', 'TotalCatchmentArea', 'ValleyDepth', 'Contrast', 'Correlation', 'Dissimilarity', 'Entropy', 'Homogeneity', 'Mean', 'ndvi', 'PCA_0', 'PCA_1', 'SecondMoment', 'Variance', 'PRE', 'SRA', 'TMP', 'VAP', 'WIND', 'BIO', 'LON', 'LAT']

In [6]:
len(feature_list)

34

In [7]:
# 使用训练点数据集采样并输出到csv文件
# 选择用于采样的数据库
env.workspace = stander_raster_gdb # 切换工作空间用于采样
# 选择用于采样的要素类
point_data = point_data
# 使用Delete_management函数删除数据库中的所有内容
try:
    arcpy.Delete_management(sample_gdb_path)
except:
    pass
# 再创建一个新的数据库
arcpy.management.CreateFileGDB(os.path.dirname(sample_gdb_path), "tabledata", "CURRENT")
# 逐个采样并保存到csv文件
for one_feature in feature_list:
    print(one_feature)
    sample_point(point_data,one_feature,os.path.join(sample_gdb_path,one_feature))

DEM
AnalyticalHillshading
Aspect
ChannelNetworkBaseLevel
ChannelNetworkDistance
ClosedDepressions
ConvergenceIndex
LSFactor
PlanCurvature
ProfileCurvature
RelativeSlopePosition
Slope
TopographicWetnessIndex
TotalCatchmentArea
ValleyDepth
Contrast
Correlation
Dissimilarity
Entropy
Homogeneity
Mean
ndvi
PCA_0
PCA_1
SecondMoment
Variance
PRE
SRA
TMP
VAP
WIND
BIO
LON
LAT


In [8]:
env.workspace = os.path.join(sample_gdb_path) # 切换工作空间用于导出csv文件
# 读取数据表并保存到csv文件
result_df = pd.DataFrame(arcpy.da.FeatureClassToNumPyArray(point_data,filter_list))
result_df.rename(columns={"OBJECTID":sample_name},inplace=True)
#  读取每个表的最后一个字段的数据,存储每个表的最后一个字段的数据
for table in feature_list:
    # 将表转换为pandas数据帧
    df = pd.DataFrame(arcpy.da.TableToNumPyArray(table, "*"))  # 确保数据表中无空值
    # 提取最后一个字段的数据
    merged_df = df[[sample_name, df.columns[-1]]]
    # 合并
    result_df = pd.merge(result_df, merged_df, on=[sample_name])
# 保存到csv文件
result_df.rename(columns=dict(zip(result_df.columns[-len(feature_list):], feature_list)),inplace=True)
result_df.drop(result_df.columns[0],axis=1,inplace=True)
# result_df.rename(columns={"PH":"PH_T"},inplace=True)
result_df.to_csv(os.path.join(sample_csv,sample_csv_name),index=False)