In [1]:
import os
import arcpy
from arcpy import env
from arcpy.sa import *
from arcpy.da import *
import pandas as pd

In [2]:
def zonstatis(in_table, fields, value_raster,out_table):
    """统计分区矢量内的栅格信息
    :param in_table: 矢量分区
    :param fields: 矢量字段
    :param value_raster: 栅格数据
    :param out_table: 输出文件"""
    ZonalStatisticsAsTable(in_zone_data=in_table,zone_field=fields,in_value_raster=value_raster,out_table=out_table,ignore_nodata='#',statistics_type='ALL')

In [3]:
# 矢量数据路径
polygon_path = r"D:\ArcGISProjects\workspace\shbyq\DZ.gdb\pre_sd_data_ele_result"
# 栅格数据路径
raster_path = r'D:\ArcGISProjects\workspace\shbyq\feature_raster_file\features_data_dy.gdb'

In [4]:
# 统计结果表格存放路径
temp_table_gdb_path = r'F:\cache_data\zone_ana\dy\zone_temp_predict.gdb'

In [5]:
# 使用Delete_management函数删除数据库中的所有内容
try:
    arcpy.Delete_management(temp_table_gdb_path)
except:
    pass
# 再创建一个新的数据库
arcpy.management.CreateFileGDB(os.path.dirname(temp_table_gdb_path), "zone_temp_predict", "CURRENT")

In [6]:
# 切换工作空间
env.workspace = raster_path

In [7]:
# 获取需要处理的栅格列表
raster_list = arcpy.ListRasters()

In [8]:
[_.name for _ in arcpy.ListFields(polygon_path)]

['OBJECTID',
 'Shape',
 'DLBM',
 'DLMC',
 '母质',
 'XMin',
 'YMin',
 'XMax',
 'YMax',
 'Centroid_X',
 'Centroid_Y',
 'Shape_Length',
 'Shape_Area']

In [9]:
raster_list = ['DEM','ndvi', 'PCA_0', 'Slope','Aspect', 'MRVBF','TopographicWetnessIndex', 'Mean', 'PH','PRE','SRA','TMP','VAP','WIND','PlanCurvature']
# 逐一统计分析
for one_raster in raster_list:
    print(one_raster)
    # 分区统计
    zonstatis(polygon_path,'OBJECTID',one_raster,os.path.join(temp_table_gdb_path,one_raster))

DEM
ndvi
PCA_0
Slope
Aspect
MRVBF
TopographicWetnessIndex
Mean
PH
PRE
SRA
TMP
VAP
WIND
PlanCurvature


### 构造训练表


In [10]:
# 切换工作空间
env.workspace = temp_table_gdb_path

In [11]:
# 获取表格列表
table_list = arcpy.ListTables()

In [13]:
# 指定需要的信息
line_fields = 'OBJECTID'
df_list = []
for one_table in table_list:
    # 将表转换为pandas数据帧
    df = pd.DataFrame(arcpy.da.TableToNumPyArray(one_table, "*"))  # 确保数据表中无空值
    # 提取统计字段的数据
    merged_df = df[[line_fields]+ list(df.columns[-10:])]
    # 在统计数据列名中添加表名
    merged_df.columns = [line_fields]+[one_table + '_' + col for col in merged_df.columns[-10:]]
    # 添加到列表
    df_list.append(merged_df)
# 初始合并数据帧
df_merged = df_list[0]
# 逐个合并其余的数据帧
for df in df_list[1:]:
    df_merged = pd.merge(df_merged, df, on=line_fields, how='outer')

In [14]:
df_merged

Unnamed: 0,OBJECTID,DEM_COUNT,DEM_AREA,DEM_MIN,DEM_MAX,DEM_RANGE,DEM_MEAN,DEM_STD,DEM_SUM,DEM_MEDIAN,...,PlanCurvature_COUNT,PlanCurvature_AREA,PlanCurvature_MIN,PlanCurvature_MAX,PlanCurvature_RANGE,PlanCurvature_MEAN,PlanCurvature_STD,PlanCurvature_SUM,PlanCurvature_MEDIAN,PlanCurvature_PCT90
0,1,73.0,1825.0,810.599976,831.500000,20.900024,820.541093,6.291782,5.989950e+04,819.900024,...,73.0,1825.0,-0.067372,0.010579,0.077950,-0.021811,0.018731,-1.592169,-0.018643,-0.000479
1,2,128.0,3200.0,818.900024,833.500000,14.599976,825.126560,2.758637,1.056162e+05,825.099976,...,128.0,3200.0,-0.026316,0.027722,0.054038,-0.002581,0.010063,-0.330411,-0.002647,0.011235
2,3,16.0,400.0,814.200012,821.299988,7.099976,817.437500,1.950917,1.307900e+04,817.500000,...,16.0,400.0,-0.036333,0.013883,0.050216,-0.020094,0.012817,-0.321511,-0.023000,-0.004573
3,4,18.0,450.0,960.299988,963.900024,3.600037,962.055552,1.146118,1.731700e+04,962.299988,...,18.0,450.0,-0.007533,0.018027,0.025560,0.005295,0.007007,0.095302,0.006621,0.013291
4,5,17.0,425.0,901.500000,905.700012,4.200012,904.005874,1.029835,1.536810e+04,904.000000,...,17.0,425.0,-0.044917,0.008462,0.053379,-0.007835,0.012118,-0.133202,-0.003221,0.001870
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
147252,147253,12.0,300.0,847.000000,861.400024,14.400024,853.966680,4.147150,1.024760e+04,854.150024,...,12.0,300.0,-0.005093,0.005961,0.011054,-0.000043,0.003661,-0.000518,0.000272,0.005187
147253,147254,3037.0,75925.0,827.699951,966.200012,138.500061,905.201578,32.108270,2.749097e+06,906.700012,...,3037.0,75925.0,-0.047357,0.050889,0.098247,0.002187,0.015498,6.642844,0.003301,0.021785
147254,147255,12.0,300.0,847.000000,861.400024,14.400024,853.966680,4.147150,1.024760e+04,854.150024,...,12.0,300.0,-0.005093,0.005961,0.011054,-0.000043,0.003661,-0.000518,0.000272,0.005187
147255,147256,19.0,475.0,957.799988,966.700012,8.900024,962.821048,2.469733,1.829360e+04,963.400024,...,19.0,475.0,-0.009158,0.011809,0.020966,0.001205,0.005776,0.022897,0.002621,0.006683


In [15]:
polygon_path = polygon_path

In [None]:
# 添加字段
def add_field(input_table,field_name,field_type='DOUBLE'):
    """参数说明：
        input_table: 输入数据表
        field_name: 字段名
        field_type: 字段类型"""
    arcpy.AddField_management(input_table,field_name,field_type)

In [None]:

# 新增面特征信息
# 添加新字段来存储四至坐标
for one_fields in ['XMin', 'YMin', 'XMax', 'YMax','Centroid_X','Centroid_Y']:
    add_field(polygon_path,one_fields)


In [None]:
# 更新字段值
with arcpy.da.UpdateCursor(polygon_path, ["SHAPE@", "XMin", "YMin", "XMax", "YMax", "Centroid_X", "Centroid_Y"]) as cursor:
    for row in cursor:
        centroid = row[0].centroid
        extent = row[0].extent
        row[1] = extent.XMin
        row[2] = extent.YMin
        row[3] = extent.XMax
        row[4] = extent.YMax
        row[5] = centroid.X
        row[6] = centroid.Y
        cursor.updateRow(row)

print("四至坐标计算完成并存储在新字段中。")


In [16]:
polygon_fields_list = [_.name for _ in arcpy.ListFields(polygon_path)]

In [17]:
print(polygon_fields_list)

['OBJECTID', 'Shape', 'DLBM', 'DLMC', '母质', 'XMin', 'YMin', 'XMax', 'YMax', 'Centroid_X', 'Centroid_Y', 'Shape_Length', 'Shape_Area']


In [18]:
polygon_df = pd.DataFrame(arcpy.da.FeatureClassToNumPyArray(polygon_path, ['OBJECTID', 'DLMC', '母质',  'XMin', 'YMin', 'XMax', 'YMax', 'Centroid_X', 'Centroid_Y']))

In [19]:
polygon_df

Unnamed: 0,OBJECTID,DLMC,母质,XMin,YMin,XMax,YMax,Centroid_X,Centroid_Y
0,1,水田,碳酸岩,453415.7591,2.909007e+06,453504.8993,2.909054e+06,453460.380336,2.909038e+06
1,2,水田,碳酸岩,453745.4063,2.909096e+06,453833.4091,2.909244e+06,453776.184840,2.909171e+06
2,3,水田,碳酸岩,453397.2270,2.909322e+06,453426.3763,2.909352e+06,453414.506759,2.909337e+06
3,4,水田,碳酸岩,453172.7335,2.903237e+06,453194.5528,2.903281e+06,453180.794689,2.903264e+06
4,5,水田,碳酸岩,453040.1877,2.903039e+06,453105.8702,2.903078e+06,453076.497652,2.903056e+06
...,...,...,...,...,...,...,...,...,...
147271,147272,乔木林地,泥(页)岩,452245.3975,2.892085e+06,452264.3931,2.892108e+06,452254.780808,2.892097e+06
147272,147273,乔木林地,泥(页)岩,452234.2057,2.891939e+06,452675.8087,2.892295e+06,452445.473955,2.892138e+06
147273,147274,乔木林地,泥(页)岩,452245.3975,2.892085e+06,452264.3931,2.892108e+06,452254.780808,2.892097e+06
147274,147275,乔木林地,砂岩,441506.1327,2.876965e+06,441558.2704,2.876986e+06,441532.863852,2.876972e+06


In [22]:
# 连接表格
result_df = pd.merge(polygon_df, df_merged, on='OBJECTID',how='left')

In [25]:
# 选择数值类型的列
import numpy as np
numeric_cols = result_df.select_dtypes(include=[np.float64])

# 计算数值列的均值
mean_values = numeric_cols.mean()

# 用均值填充数值列的空值
result_df[numeric_cols.columns] = numeric_cols.fillna(mean_values)

In [26]:
# 保存训练数据
train_data_path = r'F:\cache_data\zone_ana\dy\prediction_data'
result_df.to_csv(os.path.join(train_data_path, 'result_20240619.csv'), index=False)