In [1]:
import os
import arcpy
from arcpy import env
from arcpy.sa import *
from arcpy.da import *
import pandas as pd

In [2]:
def zonstatis(in_table, fields, value_raster,out_table):
    """统计分区矢量内的栅格信息
    :param in_table: 矢量分区
    :param fields: 矢量字段
    :param value_raster: 栅格数据
    :param out_table: 输出文件"""
    ZonalStatisticsAsTable(in_zone_data=in_table,zone_field=fields,in_value_raster=value_raster,out_table=out_table,ignore_nodata='#',statistics_type='ALL')

In [3]:
# 矢量数据路径
polygon_path = r"D:\ArcGISProjects\workspace\shbyq\DZ.gdb\select_train_calc_end"
# 栅格数据路径
raster_path = r'D:\ArcGISProjects\workspace\shbyq\feature_raster_file\features_data_dy.gdb'

In [4]:
# 统计结果表格存放路径
temp_table_gdb_path = r'F:\cache_data\zone_ana\dy\zone_temp.gdb'

In [5]:
# 使用Delete_management函数删除数据库中的所有内容
try:
    arcpy.Delete_management(temp_table_gdb_path)
except:
    pass
# 再创建一个新的数据库
arcpy.management.CreateFileGDB(os.path.dirname(temp_table_gdb_path), "zone_temp", "CURRENT")

In [5]:
# 切换工作空间
env.workspace = raster_path

In [6]:
# 获取需要处理的栅格列表
raster_list = arcpy.ListRasters()

In [7]:
[_.name for _ in arcpy.ListFields(polygon_path)]

['OBJECTID',
 'Shape',
 '母质',
 'DLMC',
 'TL',
 'YL',
 'TS',
 'TZ',
 'Shape_Length',
 'Shape_Area']

In [9]:
# 逐一统计分析
for one_raster in raster_list:
    print(one_raster)
    # 分区统计
    zonstatis(polygon_path,'OBJECTID',one_raster,os.path.join(temp_table_gdb_path,one_raster))

DEM
AnalyticalHillshading
Aspect
ChannelNetworkBaseLevel
ChannelNetworkDistance
ClosedDepressions
ConvergenceIndex
LSFactor
MRRTF
MRVBF
PlanCurvature
ProfileCurvature
RelativeSlopePosition
Slope
TopographicWetnessIndex
TotalCatchmentArea
ValleyDepth
NIGHT2022
ETP2022_mean
TMP2022_mean
PRE2022_mean
PRE2022_3
PRE2022_11
ETP2022_3
ETP2022_11
TMP2022_3
TMP2022_11
evi
lswi
mndwi
ndmi
ndvi
ndwi
PCA_0
PCA_1
savi
vari
DL
LON
LAT
SlopeClass
DZ


### 构造训练表


In [8]:
# 切换工作空间
env.workspace = temp_table_gdb_path

In [9]:
# 获取表格列表
table_list = arcpy.ListTables()

In [11]:
# 指定需要的信息
line_fields = 'OBJECTID'
df_list = []
for one_table in table_list:
    # 将表转换为pandas数据帧
    df = pd.DataFrame(arcpy.da.TableToNumPyArray(one_table, "*"))  # 确保数据表中无空值
    # 提取统计字段的数据
    merged_df = df[[line_fields]+ list(df.columns[-10:])]
    # 在统计数据列名中添加表名
    merged_df.columns = [line_fields]+[one_table + '_' + col for col in merged_df.columns[-10:]]
    # 添加到列表
    df_list.append(merged_df)
# 初始合并数据帧
df_merged = df_list[0]
# 逐个合并其余的数据帧
for df in df_list[1:]:
    df_merged = pd.merge(df_merged, df, on=line_fields, how='outer')

In [12]:
df_merged

Unnamed: 0,OBJECTID,DEM_COUNT,DEM_AREA,DEM_MIN,DEM_MAX,DEM_RANGE,DEM_MEAN,DEM_STD,DEM_SUM,DEM_MEDIAN,...,DZ_MAX,DZ_RANGE,DZ_MEAN,DZ_STD,DZ_SUM,DZ_VARIETY,DZ_MAJORITY,DZ_MINORITY,DZ_MEDIAN,DZ_PCT90
0,1,145.0,3625.0,936.400024,966.900024,30.500000,954.242757,7.767321,138365.199768,955.799988,...,8.0,0.0,8.0,0.0,1160.0,1.0,8.0,8.0,8.0,8.0
1,2,9.0,225.0,966.000000,969.099976,3.099976,967.566664,1.051979,8708.099976,967.799988,...,8.0,0.0,8.0,0.0,72.0,1.0,8.0,8.0,8.0,8.0
2,3,33.0,825.0,961.400024,975.599976,14.199951,965.633332,4.070707,31865.899963,964.299988,...,8.0,0.0,8.0,0.0,264.0,1.0,8.0,8.0,8.0,8.0
3,4,49.0,1225.0,947.900024,965.099976,17.199951,955.314290,4.485306,46810.400208,955.400024,...,8.0,0.0,8.0,0.0,392.0,1.0,8.0,8.0,8.0,8.0
4,5,77.0,1925.0,1016.400024,1033.800049,17.400024,1025.184418,3.953336,78939.200195,1025.699951,...,8.0,0.0,8.0,0.0,616.0,1.0,8.0,8.0,8.0,8.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18707,18708,242.0,6050.0,813.799988,834.799988,21.000000,822.162396,4.267215,198963.299744,822.250000,...,6.0,0.0,6.0,0.0,630.0,1.0,6.0,6.0,6.0,6.0
18708,18709,105.0,2625.0,804.799988,816.099976,11.299988,810.112384,2.641414,85061.800354,809.700012,...,6.0,0.0,6.0,0.0,660.0,1.0,6.0,6.0,6.0,6.0
18709,18710,110.0,2750.0,797.900024,809.599976,11.699951,803.350002,2.708027,88368.500183,803.400024,...,6.0,0.0,6.0,0.0,1434.0,1.0,6.0,6.0,6.0,6.0
18710,18711,239.0,5975.0,805.000000,820.299988,15.299988,811.485774,3.657591,193945.100098,810.400024,...,6.0,0.0,6.0,0.0,1008.0,1.0,6.0,6.0,6.0,6.0


In [13]:
polygon_path = polygon_path

In [14]:
# 添加字段
def add_field(input_table,field_name,field_type='DOUBLE'):
    """参数说明：
        input_table: 输入数据表
        field_name: 字段名
        field_type: 字段类型"""
    arcpy.AddField_management(input_table,field_name,field_type)

In [15]:

# 新增面特征信息
# 添加新字段来存储四至坐标
for one_fields in ['XMin', 'YMin', 'XMax', 'YMax','Centroid_X','Centroid_Y']:
    add_field(polygon_path,one_fields)


In [16]:
# 更新字段值
with arcpy.da.UpdateCursor(polygon_path, ["SHAPE@", "XMin", "YMin", "XMax", "YMax", "Centroid_X", "Centroid_Y"]) as cursor:
    for row in cursor:
        centroid = row[0].centroid
        extent = row[0].extent
        row[1] = extent.XMin
        row[2] = extent.YMin
        row[3] = extent.XMax
        row[4] = extent.YMax
        row[5] = centroid.X
        row[6] = centroid.Y
        cursor.updateRow(row)

print("四至坐标计算完成并存储在新字段中。")


四至坐标计算完成并存储在新字段中。


In [17]:
polygon_fields_list = [_.name for _ in arcpy.ListFields(polygon_path)]

In [18]:
print(polygon_fields_list)

['OBJECTID', 'Shape', '母质', 'DLMC', 'TL', 'YL', 'TS', 'TZ', 'Shape_Length', 'Shape_Area', 'XMin', 'YMin', 'XMax', 'YMax', 'Centroid_X', 'Centroid_Y']


In [23]:
polygon_df = pd.DataFrame(arcpy.da.FeatureClassToNumPyArray(polygon_path, ['OBJECTID', 'DLMC', '母质', 'TL', 'YL', 'TS', 'TZ', 'XMin', 'YMin', 'XMax', 'YMax', 'Centroid_X', 'Centroid_Y']))

In [24]:
polygon_df

Unnamed: 0,OBJECTID,DLMC,母质,TL,YL,TS,TZ,XMin,YMin,XMax,YMax,Centroid_X,Centroid_Y
0,2,旱地,第四纪冰川冲积物,黄壤,典型黄壤,泥砂质黄壤,中层泥砂质黄壤,455005.0673,2.882167e+06,455065.4423,2.882306e+06,455037.026989,2.882233e+06
1,3,旱地,第四纪冰川冲积物,黄壤,典型黄壤,泥砂质黄壤,中层泥砂质黄壤,454999.4751,2.882201e+06,455016.8091,2.882242e+06,455007.392802,2.882228e+06
2,4,旱地,第四纪冰川冲积物,黄壤,典型黄壤,泥砂质黄壤,中层泥砂质黄壤,455166.5979,2.882134e+06,455232.0584,2.882195e+06,455199.328100,2.882192e+06
3,5,旱地,第四纪冰川冲积物,黄壤,典型黄壤,泥砂质黄壤,中层泥砂质黄壤,454999.5027,2.882222e+06,455036.8687,2.882290e+06,455020.789501,2.882255e+06
4,6,旱地,第四纪冰川冲积物,黄壤,典型黄壤,泥砂质黄壤,中层泥砂质黄壤,454621.5787,2.882275e+06,454675.3769,2.882336e+06,454648.758455,2.882305e+06
...,...,...,...,...,...,...,...,...,...,...,...,...,...
18731,20931,水田,砾岩,水稻土,潴育型水稻土,白粉泥田,黄白粉泥田,439451.2660,2.865012e+06,439562.8506,2.865119e+06,439509.957170,2.865072e+06
18732,20934,水田,砾岩,水稻土,潴育型水稻土,白粉泥田,黄白粉泥田,439469.2361,2.864877e+06,439569.5532,2.864932e+06,439520.375053,2.864909e+06
18733,20935,水田,砾岩,水稻土,潴育型水稻土,白粉泥田,黄白粉泥田,439569.5532,2.864830e+06,439639.9568,2.864926e+06,439608.190059,2.864878e+06
18734,20937,水田,砾岩,水稻土,潴育型水稻土,白粉泥田,黄白粉泥田,439515.6290,2.864902e+06,439600.0773,2.865029e+06,439564.002940,2.864972e+06


In [25]:
# 连接表格
result_df = pd.merge(polygon_df, df_merged, on='OBJECTID',how='left')

In [26]:
# 保存训练数据
train_data_path = r'F:\cache_data\zone_ana\dy\train_data'
result_df.to_csv(os.path.join(train_data_path, 'train_20240726.csv'), index=False)