In [3]:
import os
import arcpy
from arcpy import env
from arcpy.sa import *
from arcpy.da import *
import pandas as pd

In [4]:
def zonstatis(in_table, fields, value_raster,out_table):
    """统计分区矢量内的栅格信息
    :param in_table: 矢量分区
    :param fields: 矢量字段
    :param value_raster: 栅格数据
    :param out_table: 输出文件"""
    ZonalStatisticsAsTable(in_zone_data=in_table,zone_field=fields,in_value_raster=value_raster,out_table=out_table,ignore_nodata='#',statistics_type='ALL')

In [5]:
# 矢量数据路径
polygon_path = r"D:\ArcGISProjects\workspace\shbyq\DZ.gdb\filte_sd_end0618"
# 栅格数据路径
raster_path = r'D:\ArcGISProjects\workspace\shbyq\feature_raster_file\features_data_dy.gdb'

In [6]:
# 统计结果表格存放路径
temp_table_gdb_path = r'F:\cache_data\zone_ana\dy\zone_temp_predict.gdb'

In [7]:
# 使用Delete_management函数删除数据库中的所有内容
try:
    arcpy.Delete_management(temp_table_gdb_path)
except:
    pass
# 再创建一个新的数据库
arcpy.management.CreateFileGDB(os.path.dirname(temp_table_gdb_path), "zone_temp_predict", "CURRENT")

In [8]:
# 切换工作空间
env.workspace = raster_path

In [9]:
# 获取需要处理的栅格列表
raster_list = arcpy.ListRasters()

In [10]:
[_.name for _ in arcpy.ListFields(polygon_path)]

['OBJECTID',
 'Shape',
 'DLMC',
 '母质',
 'TL',
 'YL',
 'TS',
 'TZ',
 'Shape_Length',
 'Shape_Area']

In [11]:
# 逐一统计分析
for one_raster in raster_list:
    print(one_raster)
    # 分区统计
    zonstatis(polygon_path,'OBJECTID',one_raster,os.path.join(temp_table_gdb_path,one_raster))

DEM
AnalyticalHillshading
Aspect
ChannelNetworkBaseLevel
ChannelNetworkDistance
ClosedDepressions
ConvergenceIndex
LSFactor
PlanCurvature
ProfileCurvature
RelativeSlopePosition
Slope
TopographicWetnessIndex
TotalCatchmentArea
ValleyDepth
Contrast
Correlation
Dissimilarity
Entropy
Homogeneity
Mean
ndvi
PCA_0
PCA_1
SecondMoment
Variance
PRE
SRA
TMP
VAP
WIND
BIO
LON
LAT
PH
DZ
DL
MRRTF
MRVBF


### 构造训练表


In [12]:
# 切换工作空间
env.workspace = temp_table_gdb_path

In [13]:
# 获取表格列表
table_list = arcpy.ListTables()

In [14]:
# 指定需要的信息
line_fields = 'OBJECTID'
df_list = []
for one_table in table_list:
    # 将表转换为pandas数据帧
    df = pd.DataFrame(arcpy.da.TableToNumPyArray(one_table, "*"))  # 确保数据表中无空值
    # 提取统计字段的数据
    merged_df = df[[line_fields]+ list(df.columns[-10:])]
    # 在统计数据列名中添加表名
    merged_df.columns = [line_fields]+[one_table + '_' + col for col in merged_df.columns[-10:]]
    # 添加到列表
    df_list.append(merged_df)
# 初始合并数据帧
df_merged = df_list[0]
# 逐个合并其余的数据帧
for df in df_list[1:]:
    df_merged = pd.merge(df_merged, df, on=line_fields, how='outer')

In [15]:
df_merged

Unnamed: 0,OBJECTID,DEM_COUNT,DEM_AREA,DEM_MIN,DEM_MAX,DEM_RANGE,DEM_MEAN,DEM_STD,DEM_SUM,DEM_MEDIAN,...,MRVBF_COUNT,MRVBF_AREA,MRVBF_MIN,MRVBF_MAX,MRVBF_RANGE,MRVBF_MEAN,MRVBF_STD,MRVBF_SUM,MRVBF_MEDIAN,MRVBF_PCT90
0,1,22.0,550.0,907.500000,911.400024,3.900024,908.972731,1.111586,19997.400085,908.850006,...,22.0,550.0,0.016284,2.990280,2.973997,1.919678,1.146824,42.232911,2.369035,2.988014
1,2,627.0,15675.0,875.200012,884.000000,8.799988,878.200797,1.398296,550631.900024,878.400024,...,627.0,15675.0,0.599543,4.981056,4.381513,4.002467,0.875012,2509.546746,3.978155,4.916583
2,3,467.0,11675.0,866.299988,869.500000,3.200012,868.042613,0.683362,405375.900146,868.099976,...,467.0,11675.0,4.930423,5.951635,1.021212,5.348103,0.400779,2497.564015,5.245837,5.875710
3,4,54.0,1350.0,865.000000,866.200012,1.200012,865.562968,0.303861,46740.400269,865.549988,...,54.0,1350.0,4.939991,4.974739,0.034748,4.957116,0.009110,267.684281,4.957727,4.969123
4,5,32.0,800.0,871.700012,872.900024,1.200012,872.290627,0.350318,27913.300049,872.250000,...,32.0,800.0,4.980256,4.984515,0.004259,4.982545,0.001083,159.441453,4.982555,4.984013
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3511,3512,60.0,1500.0,859.799988,866.500000,6.700012,863.835001,1.952928,51830.100037,864.299988,...,60.0,1500.0,0.054332,4.945550,4.891218,2.511352,2.004947,150.681142,1.677247,4.920014
3512,3513,45.0,1125.0,865.000000,866.700012,1.700012,866.046667,0.391919,38972.100037,866.099976,...,45.0,1125.0,4.982695,4.989220,0.006526,4.988032,0.001375,224.461449,4.988360,4.989126
3513,3514,55.0,1375.0,865.000000,865.299988,0.299988,865.198174,0.099984,47585.899597,865.200012,...,55.0,1375.0,4.979093,4.987638,0.008546,4.985137,0.002225,274.182509,4.985968,4.987318
3514,3515,47.0,1175.0,860.400024,861.599976,1.199951,860.844677,0.261590,40459.699829,860.799988,...,47.0,1175.0,4.946017,4.989026,0.043009,4.984661,0.007999,234.279087,4.987025,4.988748


In [16]:
polygon_path = polygon_path

In [17]:
# 添加字段
def add_field(input_table,field_name,field_type='DOUBLE'):
    """参数说明：
        input_table: 输入数据表
        field_name: 字段名
        field_type: 字段类型"""
    arcpy.AddField_management(input_table,field_name,field_type)

In [18]:

# 新增面特征信息
# 添加新字段来存储四至坐标
for one_fields in ['XMin', 'YMin', 'XMax', 'YMax','Centroid_X','Centroid_Y']:
    add_field(polygon_path,one_fields)


In [19]:
# 更新字段值
with arcpy.da.UpdateCursor(polygon_path, ["SHAPE@", "XMin", "YMin", "XMax", "YMax", "Centroid_X", "Centroid_Y"]) as cursor:
    for row in cursor:
        centroid = row[0].centroid
        extent = row[0].extent
        row[1] = extent.XMin
        row[2] = extent.YMin
        row[3] = extent.XMax
        row[4] = extent.YMax
        row[5] = centroid.X
        row[6] = centroid.Y
        cursor.updateRow(row)

print("四至坐标计算完成并存储在新字段中。")


四至坐标计算完成并存储在新字段中。


In [20]:
polygon_fields_list = [_.name for _ in arcpy.ListFields(polygon_path)]

In [21]:
print(polygon_fields_list)

['OBJECTID', 'Shape', 'DLMC', '母质', 'TL', 'YL', 'TS', 'TZ', 'Shape_Length', 'Shape_Area', 'XMin', 'YMin', 'XMax', 'YMax', 'Centroid_X', 'Centroid_Y']


In [22]:
polygon_df = pd.DataFrame(arcpy.da.FeatureClassToNumPyArray(polygon_path, ['OBJECTID', 'DLMC', '母质', 'TL', 'YL', 'TS', 'TZ', 'XMin', 'YMin', 'XMax', 'YMax', 'Centroid_X', 'Centroid_Y']))

In [23]:
# 连接表格
result_df = pd.merge(polygon_df, df_merged, on='OBJECTID',how='left')

In [25]:
# 保存训练数据
train_data_path = r'F:\cache_data\zone_ana\dy\train_data'
result_df.to_csv(os.path.join(train_data_path, 'train_20240618.csv'), index=False)