In [1]:
import os
import arcpy
from arcpy import env
from arcpy.sa import *
from arcpy.da import *
import pandas as pd

In [2]:
def zonstatis(in_table, fields, value_raster,out_table):
    """统计分区矢量内的栅格信息
    :param in_table: 矢量分区
    :param fields: 矢量字段
    :param value_raster: 栅格数据
    :param out_table: 输出文件"""
    # ZonalStatisticsAsTable(in_zone_data=in_table,zone_field=fields,in_value_raster=value_raster,out_table=out_table,ignore_nodata='#',statistics_type='ALL')
    ZonalStatisticsAsTable(in_zone_data=in_table,zone_field=fields,in_value_raster=value_raster,out_table=out_table,ignore_nodata='DATA',statistics_type='ALL')

In [4]:
# 矢量数据路径
polygon_path = r"D:\ArcGISProjects\workspace\shbyq\DZ.gdb\DY_SD_MZ_SLOPEPOSITION_INTERSECT_SINGLE_ELIMITE_2"
# 栅格数据路径
raster_path = r'D:\ArcGISProjects\workspace\shbyq\feature_raster_file\features_data_dy.gdb'

In [5]:
# 统计结果表格存放路径
temp_table_gdb_path = r'F:\cache_data\zone_ana\dy\zone_temp_predict.gdb'

In [6]:
# 使用Delete_management函数删除数据库中的所有内容
try:
    arcpy.Delete_management(temp_table_gdb_path)
except:
    pass
# 再创建一个新的数据库
arcpy.management.CreateFileGDB(os.path.dirname(temp_table_gdb_path), "zone_temp_predict", "CURRENT")

In [7]:
# 切换工作空间
env.workspace = raster_path

In [8]:
# 获取需要处理的栅格列表
raster_list = arcpy.ListRasters()

In [9]:
[_.name for _ in arcpy.ListFields(polygon_path)]

['OBJECTID',
 'Shape',
 '母质',
 'DLBM',
 'DLMC',
 'DZ',
 'gridcode',
 'Area',
 'Shape_Length',
 'Shape_Area']

In [10]:
raster_list

['DEM',
 'AnalyticalHillshading',
 'Aspect',
 'ChannelNetworkBaseLevel',
 'ChannelNetworkDistance',
 'ClosedDepressions',
 'ConvergenceIndex',
 'LSFactor',
 'MRRTF',
 'MRVBF',
 'PlanCurvature',
 'ProfileCurvature',
 'RelativeSlopePosition',
 'Slope',
 'TopographicWetnessIndex',
 'TotalCatchmentArea',
 'ValleyDepth',
 'NIGHT2022',
 'ETP2022_mean',
 'TMP2022_mean',
 'PRE2022_mean',
 'PRE2022_3',
 'PRE2022_11',
 'ETP2022_3',
 'ETP2022_11',
 'TMP2022_3',
 'TMP2022_11',
 'evi',
 'lswi',
 'mndwi',
 'ndmi',
 'ndvi',
 'ndwi',
 'PCA_0',
 'PCA_1',
 'savi',
 'vari',
 'DL',
 'LON',
 'LAT',
 'SlopeClass',
 'DZ']

In [11]:
raster_list = ['DEM','AnalyticalHillshading','Aspect',
 'ChannelNetworkBaseLevel',
 'ChannelNetworkDistance',
 'ConvergenceIndex',
 'LSFactor',
 'MRRTF',
 'MRVBF',
 'PlanCurvature',
 'ProfileCurvature',
 'RelativeSlopePosition',
 'Slope',
 'TopographicWetnessIndex',
 'TotalCatchmentArea',
 'ValleyDepth',
 'NIGHT2022',
 'ETP2022_mean',
 'TMP2022_mean',
 'PRE2022_mean',
 'PRE2022_3',
 'PRE2022_11',
 'ETP2022_3',
 'ETP2022_11',
 'TMP2022_3',
 'TMP2022_11',
 'evi',
 'lswi',
 'mndwi',
 'ndmi',
 'ndvi',
 'ndwi',
 'PCA_0',
 'PCA_1',
 'savi',
 'vari',
 'DL',
 'SlopeClass',
 'DZ']
# 逐一统计分析
for one_raster in raster_list:
    print(one_raster)
    # 分区统计
    zonstatis(polygon_path,'OBJECTID',one_raster,os.path.join(temp_table_gdb_path,one_raster))

DEM
AnalyticalHillshading
Aspect
ChannelNetworkBaseLevel
ChannelNetworkDistance
ConvergenceIndex
LSFactor
MRRTF
MRVBF
PlanCurvature
ProfileCurvature
RelativeSlopePosition
Slope
TopographicWetnessIndex
TotalCatchmentArea
ValleyDepth
NIGHT2022
ETP2022_mean
TMP2022_mean
PRE2022_mean
PRE2022_3
PRE2022_11
ETP2022_3
ETP2022_11
TMP2022_3
TMP2022_11
evi
lswi
mndwi
ndmi
ndvi
ndwi
PCA_0
PCA_1
savi
vari
DL
SlopeClass
DZ


### 构造训练表


In [12]:
# 切换工作空间
env.workspace = temp_table_gdb_path

In [13]:
# 获取表格列表
table_list = arcpy.ListTables()

In [14]:
# 指定需要的信息
line_fields = 'OBJECTID'
df_list = []
for one_table in table_list:
    # 将表转换为pandas数据帧
    df = pd.DataFrame(arcpy.da.TableToNumPyArray(one_table, "*"))  # 确保数据表中无空值
    # 提取统计字段的数据
    merged_df = df[[line_fields]+ list(df.columns[-10:])]
    # 在统计数据列名中添加表名
    merged_df.columns = [line_fields]+[one_table + '_' + col for col in merged_df.columns[-10:]]
    # 添加到列表
    df_list.append(merged_df)
# 初始合并数据帧
df_merged = df_list[0]
# 逐个合并其余的数据帧
for df in df_list[1:]:
    df_merged = pd.merge(df_merged, df, on=line_fields, how='outer')

In [15]:
df_merged

Unnamed: 0,OBJECTID,DEM_COUNT,DEM_AREA,DEM_MIN,DEM_MAX,DEM_RANGE,DEM_MEAN,DEM_STD,DEM_SUM,DEM_MEDIAN,...,DZ_MAX,DZ_RANGE,DZ_MEAN,DZ_STD,DZ_SUM,DZ_VARIETY,DZ_MAJORITY,DZ_MINORITY,DZ_MEDIAN,DZ_PCT90
0,1,18.0,450.0,921.500000,925.200012,3.700012,923.488888,1.128365,16622.799988,923.600006,...,8,0,8.0,0.0,144.0,1,8,8,8,8
1,2,2.0,50.0,925.700012,927.000000,1.299988,926.350006,0.649994,1852.700012,926.350006,...,8,0,8.0,0.0,16.0,1,8,8,8,8
2,3,16.0,400.0,927.500000,929.200012,1.700012,928.256256,0.554499,14852.100098,928.100006,...,8,0,8.0,0.0,128.0,1,8,8,8,8
3,4,145.0,3625.0,936.400024,966.900024,30.500000,954.242757,7.767321,138365.199768,955.799988,...,8,0,8.0,0.0,1160.0,1,8,8,8,8
4,5,9.0,225.0,966.000000,969.099976,3.099976,967.566664,1.051979,8708.099976,967.799988,...,8,0,8.0,0.0,72.0,1,8,8,8,8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
216626,216627,253.0,6325.0,881.299988,934.699951,53.399963,909.910275,12.982616,230207.299561,910.000000,...,1,0,1.0,0.0,253.0,1,1,1,1,1
216627,216628,757.0,18925.0,835.400024,903.500000,68.099976,869.152574,15.643079,657948.498779,868.700012,...,1,0,1.0,0.0,757.0,1,1,1,1,1
216628,216629,42.0,1050.0,883.900024,908.099976,24.199951,900.471425,6.060856,37819.799866,902.049988,...,1,0,1.0,0.0,42.0,1,1,1,1,1
216629,216630,122.0,3050.0,849.200012,900.899963,51.699951,872.011473,13.682877,106385.399719,870.899994,...,1,0,1.0,0.0,122.0,1,1,1,1,1


In [16]:
polygon_path = polygon_path

In [17]:
# 添加字段
def add_field(input_table,field_name,field_type='DOUBLE'):
    """参数说明：
        input_table: 输入数据表
        field_name: 字段名
        field_type: 字段类型"""
    arcpy.AddField_management(input_table,field_name,field_type)

In [18]:

# 新增面特征信息
# 添加新字段来存储四至坐标
for one_fields in ['XMin', 'YMin', 'XMax', 'YMax','Centroid_X','Centroid_Y']:
    add_field(polygon_path,one_fields)


In [19]:
# 更新字段值
with arcpy.da.UpdateCursor(polygon_path, ["SHAPE@", "XMin", "YMin", "XMax", "YMax", "Centroid_X", "Centroid_Y"]) as cursor:
    for row in cursor:
        centroid = row[0].centroid
        extent = row[0].extent
        row[1] = extent.XMin
        row[2] = extent.YMin
        row[3] = extent.XMax
        row[4] = extent.YMax
        row[5] = centroid.X
        row[6] = centroid.Y
        cursor.updateRow(row)

print("四至坐标计算完成并存储在新字段中。")


四至坐标计算完成并存储在新字段中。


In [20]:
polygon_fields_list = [_.name for _ in arcpy.ListFields(polygon_path)]

In [21]:
print(polygon_fields_list)

['OBJECTID', 'Shape', '母质', 'DLBM', 'DLMC', 'DZ', 'gridcode', 'Area', 'Shape_Length', 'Shape_Area', 'XMin', 'YMin', 'XMax', 'YMax', 'Centroid_X', 'Centroid_Y']


In [22]:
polygon_df = pd.DataFrame(arcpy.da.FeatureClassToNumPyArray(polygon_path, ['OBJECTID', 'DLMC', '母质',  'gridcode','XMin', 'YMin', 'XMax', 'YMax', 'Centroid_X', 'Centroid_Y']))

In [23]:
polygon_df

Unnamed: 0,OBJECTID,DLMC,母质,gridcode,XMin,YMin,XMax,YMax,Centroid_X,Centroid_Y
0,1,水田,第四纪冰川冲积物,1,454739.6637,2.882454e+06,454781.3714,2.882474e+06,454756.942612,2.882465e+06
1,2,水田,第四纪冰川冲积物,1,454881.8917,2.882450e+06,454898.7767,2.882460e+06,454892.921053,2.882455e+06
2,3,水田,第四纪冰川冲积物,1,454954.1197,2.882387e+06,454976.8166,2.882435e+06,454964.636696,2.882409e+06
3,4,旱地,第四纪冰川冲积物,1,455005.0673,2.882167e+06,455065.4423,2.882306e+06,455037.026989,2.882233e+06
4,5,旱地,第四纪冰川冲积物,2,454999.4751,2.882201e+06,455016.8091,2.882242e+06,455007.392802,2.882228e+06
...,...,...,...,...,...,...,...,...,...,...
216736,216745,乔木林地,泥(页)岩,2,452412.5206,2.891939e+06,452564.0716,2.892049e+06,452468.057822,2.891997e+06
216737,216746,乔木林地,泥(页)岩,2,452264.0354,2.892170e+06,452541.4760,2.892295e+06,452423.800567,2.892228e+06
216738,216747,乔木林地,泥(页)岩,2,452590.0488,2.892170e+06,452675.8087,2.892206e+06,452654.368700,2.892189e+06
216739,216748,乔木林地,泥(页)岩,2,452276.5958,2.892019e+06,452380.1389,2.892118e+06,452354.253100,2.892072e+06


In [24]:
# 连接表格
result_df = pd.merge(polygon_df, df_merged, on='OBJECTID',how='left')

In [28]:
missing_ids = set(polygon_df['OBJECTID']) - set(df_merged['OBJECTID'])
print(f"Missing OBJECTID: {missing_ids}")
print(len(missing_ids))

Missing OBJECTID: {216632, 216633, 216634, 216635, 216636, 216637, 216638, 216639, 216640, 216641, 216642, 216643, 216644, 216645, 216646, 216647, 216648, 216649, 216650, 216651, 216652, 216653, 216654, 216655, 216656, 216657, 216658, 216659, 216660, 216661, 216662, 216663, 216664, 216665, 216666, 216667, 216668, 216669, 216670, 216671, 216672, 216673, 216674, 216675, 216676, 216677, 216678, 216679, 216680, 216681, 216682, 216683, 216684, 216685, 216686, 216687, 216688, 216689, 216690, 216691, 216692, 216693, 216694, 216695, 216696, 216697, 216698, 216699, 216700, 216701, 216702, 216703, 216704, 216705, 216706, 216707, 216708, 216709, 216710, 216711, 216712, 216713, 216714, 216715, 216716, 216717, 216718, 216719, 216720, 216721, 216722, 216723, 216724, 216725, 216726, 216727, 216728, 216729, 216730, 216731, 216732, 216733, 216734, 216735, 216736, 216737, 216738, 216739, 216740, 216741, 216742, 216743, 216744, 216745, 216746, 216747, 216748, 216749}
118


In [26]:
print(len(polygon_df['OBJECTID']),len(df_merged['OBJECTID']))

216741 216631


In [29]:
# 选择数值类型的列
import numpy as np
numeric_cols = result_df.select_dtypes(include=[np.float64])

# 计算数值列的均值
mean_values = numeric_cols.mean()

# 用均值填充数值列的空值
result_df[numeric_cols.columns] = numeric_cols.fillna(mean_values)

In [30]:
# 保存训练数据
train_data_path = r'F:\cache_data\zone_ana\dy\prediction_data'
result_df.to_csv(os.path.join(train_data_path, 'result_20240728.csv'), index=False)