In [12]:
import os
import arcpy
from arcpy import env
from arcpy.sa import *
from arcpy.da import *
import pandas as pd

In [13]:
def zonstatis(in_table, fields, value_raster,out_table):
    """统计分区矢量内的栅格信息
    :param in_table: 矢量分区
    :param fields: 矢量字段
    :param value_raster: 栅格数据
    :param out_table: 输出文件"""
    # ZonalStatisticsAsTable(in_zone_data=in_table,zone_field=fields,in_value_raster=value_raster,out_table=out_table,ignore_nodata='#',statistics_type='ALL')
    ZonalStatisticsAsTable(in_zone_data=in_table,zone_field=fields,in_value_raster=value_raster,out_table=out_table,ignore_nodata='DATA',statistics_type='ALL')

In [14]:
# 矢量数据路径
polygon_path = r"D:\ArcGISProjects\workspace\sp2024\MyProject.gdb\sb_merge_data__MultipartToSi1_rsult"
# 栅格数据路径
raster_path = r'F:\tif_features\county_feature\sb'

In [15]:
# 统计结果表格存放路径
temp_table_gdb_path = r'F:\cache_data\zone_ana\sb\zone_temp_predict.gdb'

In [17]:
# 使用Delete_management函数删除数据库中的所有内容
try:
    arcpy.Delete_management(temp_table_gdb_path)
except:
    pass
# 再创建一个新的数据库
arcpy.management.CreateFileGDB(os.path.dirname(temp_table_gdb_path), "zone_temp_predict", "CURRENT")

In [18]:
# 切换工作空间
env.workspace = raster_path
env.overwriteOutput = True

In [19]:
# 获取需要处理的栅格列表
raster_list = arcpy.ListRasters()

In [20]:
[_.name for _ in arcpy.ListFields(polygon_path)]

['OBJECTID',
 'Shape',
 'DLBM',
 'DLMC',
 'DLDM',
 'MZMC',
 'Shape_Length',
 'Shape_Area']

In [21]:
raster_list = ['aligned_Analytical Hillshading.tif',
 'aligned_Aspect.tif',
 'aligned_Channel Network Base Level.tif',
 'aligned_Channel Network Distance.tif',
 'aligned_Convergence Index.tif',
 'aligned_dem.tif',
 'aligned_ETP2022_3.tif',
 'aligned_ETP2022_8.tif',
 'aligned_ETP2022_mean.tif',
 'aligned_evi.tif',
 'aligned_LS-Factor.tif',
 'aligned_lswi.tif',
 'aligned_mndwi.tif',
 'aligned_ndmi.tif',
 'aligned_ndvi.tif',
 'aligned_ndwi.tif',
 'aligned_NIGHT2022.tif',
 'aligned_pca_1.tif',
 'aligned_pca_2.tif',
 'aligned_Plan Curvature.tif',
 'aligned_PRE2022_3.tif',
 'aligned_PRE2022_8.tif',
 'aligned_PRE2022_mean.tif',
 'aligned_Profile Curvature.tif',
 'aligned_Relative Slope Position.tif',
 'aligned_savi.tif',
 'aligned_Slope.tif',
 'aligned_TMP2022_3.tif',
 'aligned_TMP2022_8.tif',
 'aligned_TMP2022_mean.tif',
 'aligned_Topographic Wetness Index.tif',
 'aligned_Total Catchment Area.tif',
 'aligned_Valley Depth.tif',
 'aligned_vari.tif',
 'MRRTF.tif',
 'MRVBF.tif',
 'slope_postion_101_smooth.tif']

In [22]:
# 逐一统计分析
for one_raster in raster_list:
    raster_name = str(one_raster.split('.')[0]).replace(' ','_').replace('-','_')

    print(raster_name)
    print(one_raster)
    # 分区统计
    zonstatis(polygon_path,'OBJECTID',one_raster,os.path.join(temp_table_gdb_path,raster_name))

aligned_Analytical_Hillshading
aligned_Analytical Hillshading.tif
aligned_Aspect
aligned_Aspect.tif
aligned_Channel_Network_Base_Level
aligned_Channel Network Base Level.tif
aligned_Channel_Network_Distance
aligned_Channel Network Distance.tif
aligned_Convergence_Index
aligned_Convergence Index.tif
aligned_dem
aligned_dem.tif
aligned_ETP2022_3
aligned_ETP2022_3.tif
aligned_ETP2022_8
aligned_ETP2022_8.tif
aligned_ETP2022_mean
aligned_ETP2022_mean.tif
aligned_evi
aligned_evi.tif
aligned_LS_Factor
aligned_LS-Factor.tif
aligned_lswi
aligned_lswi.tif
aligned_mndwi
aligned_mndwi.tif
aligned_ndmi
aligned_ndmi.tif
aligned_ndvi
aligned_ndvi.tif
aligned_ndwi
aligned_ndwi.tif
aligned_NIGHT2022
aligned_NIGHT2022.tif
aligned_pca_1
aligned_pca_1.tif
aligned_pca_2
aligned_pca_2.tif
aligned_Plan_Curvature
aligned_Plan Curvature.tif
aligned_PRE2022_3
aligned_PRE2022_3.tif
aligned_PRE2022_8
aligned_PRE2022_8.tif
aligned_PRE2022_mean
aligned_PRE2022_mean.tif
aligned_Profile_Curvature
aligned_Profile Curv

### 构造训练表


In [23]:
# 切换工作空间
env.workspace = temp_table_gdb_path

In [24]:
# 获取表格列表
table_list = arcpy.ListTables()

In [25]:
# 指定需要的信息
line_fields = 'OBJECTID_1'
df_list = []
for one_table in table_list:
    # 将表转换为pandas数据帧
    df = pd.DataFrame(arcpy.da.TableToNumPyArray(one_table, "*"))  # 确保数据表中无空值
    # 提取统计字段的数据
    merged_df = df[[line_fields]+ list(df.columns[-10:])]
    # 在统计数据列名中添加表名
    merged_df.columns = [line_fields]+[one_table + '_' + col for col in merged_df.columns[-10:]]
    # 添加到列表
    df_list.append(merged_df)
# 初始合并数据帧
df_merged = df_list[0]
# 逐个合并其余的数据帧
for df in df_list[1:]:
    df_merged = pd.merge(df_merged, df, on=line_fields, how='outer')

In [35]:
df_merged.rename(columns={'OBJECTID_1':'OBJECTID'},inplace=True)

In [36]:
df_merged

Unnamed: 0,OBJECTID,aligned_Analytical_Hillshading_COUNT,aligned_Analytical_Hillshading_AREA,aligned_Analytical_Hillshading_MIN,aligned_Analytical_Hillshading_MAX,aligned_Analytical_Hillshading_RANGE,aligned_Analytical_Hillshading_MEAN,aligned_Analytical_Hillshading_STD,aligned_Analytical_Hillshading_SUM,aligned_Analytical_Hillshading_MEDIAN,...,slope_postion_101_smooth_MAX,slope_postion_101_smooth_RANGE,slope_postion_101_smooth_MEAN,slope_postion_101_smooth_STD,slope_postion_101_smooth_SUM,slope_postion_101_smooth_VARIETY,slope_postion_101_smooth_MAJORITY,slope_postion_101_smooth_MINORITY,slope_postion_101_smooth_MEDIAN,slope_postion_101_smooth_PCT90
0,1,1466.0,36650.0,0.085114,1.426273,1.341159,1.009364,0.189264,1479.727245,1.029902,...,6,0,6.000000,0.000000,8796.0,1,6,6,6,6
1,2,1963.0,49075.0,0.553184,1.844513,1.291329,1.159355,0.201154,2275.814766,1.153229,...,5,0,5.000000,0.000000,9815.0,1,5,5,5,5
2,3,930.0,23250.0,0.468001,1.639667,1.171666,1.125067,0.163594,1046.312610,1.122798,...,6,0,6.000000,0.000000,5580.0,1,6,6,6,6
3,4,657.0,16425.0,0.344335,1.460095,1.115760,1.080594,0.215478,709.950295,1.128205,...,6,0,6.000000,0.000000,3942.0,1,6,6,6,6
4,5,426.0,10650.0,0.513177,1.330774,0.817597,0.950940,0.176512,405.100252,0.969034,...,6,0,6.000000,0.000000,2556.0,1,6,6,6,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
186000,186127,89.0,2225.0,0.055052,1.072301,1.017249,0.521279,0.227089,46.393839,0.448386,...,5,0,5.000000,0.000000,445.0,1,5,5,5,5
186001,186128,56.0,1400.0,0.329370,0.969451,0.640081,0.713920,0.151047,39.979518,0.743747,...,5,1,4.964286,0.185577,278.0,2,5,4,5,5
186002,186129,20.0,500.0,0.385134,0.575790,0.190656,0.499267,0.053660,9.985333,0.505297,...,4,0,4.000000,0.000000,80.0,1,4,4,4,4
186003,186130,349.0,8725.0,0.059767,1.108773,1.049006,0.507475,0.199985,177.108794,0.475848,...,5,0,5.000000,0.000000,1745.0,1,5,5,5,5


In [27]:
polygon_path = polygon_path

In [28]:
# 添加字段
def add_field(input_table,field_name,field_type='DOUBLE'):
    """参数说明：
        input_table: 输入数据表
        field_name: 字段名
        field_type: 字段类型"""
    arcpy.AddField_management(input_table,field_name,field_type)

In [29]:

# 新增面特征信息
# 添加新字段来存储四至坐标
for one_fields in ['XMin', 'YMin', 'XMax', 'YMax','Centroid_X','Centroid_Y']:
    add_field(polygon_path,one_fields)


In [30]:
# 更新字段值
with arcpy.da.UpdateCursor(polygon_path, ["SHAPE@", "XMin", "YMin", "XMax", "YMax", "Centroid_X", "Centroid_Y"]) as cursor:
    for row in cursor:
        centroid = row[0].centroid
        extent = row[0].extent
        row[1] = extent.XMin
        row[2] = extent.YMin
        row[3] = extent.XMax
        row[4] = extent.YMax
        row[5] = centroid.X
        row[6] = centroid.Y
        cursor.updateRow(row)

print("四至坐标计算完成并存储在新字段中。")


四至坐标计算完成并存储在新字段中。


In [31]:
polygon_fields_list = [_.name for _ in arcpy.ListFields(polygon_path)]

In [32]:
print(polygon_fields_list)

['OBJECTID', 'Shape', 'DLBM', 'DLMC', 'DLDM', 'MZMC', 'Shape_Length', 'Shape_Area', 'XMin', 'YMin', 'XMax', 'YMax', 'Centroid_X', 'Centroid_Y']


In [33]:
polygon_df = pd.DataFrame(arcpy.da.FeatureClassToNumPyArray(polygon_path, ['OBJECTID', 'DLBM', 'DLMC', 'DLDM', 'MZMC','Centroid_X', 'Centroid_Y']))

In [34]:
polygon_df

Unnamed: 0,OBJECTID,DLBM,DLMC,DLDM,MZMC,Centroid_X,Centroid_Y
0,1,0301,乔木林地,03,板岩,508927.958157,3.024905e+06
1,2,0301,乔木林地,03,板岩,508735.165800,3.024880e+06
2,3,0301,乔木林地,03,板岩,508411.483730,3.024868e+06
3,4,0307,其他林地,03,板岩,508161.635127,3.024848e+06
4,5,0301,乔木林地,03,板岩,508015.128353,3.024793e+06
...,...,...,...,...,...,...,...
186126,186127,0101,水田,01,泥(页)岩,545512.219367,2.963693e+06
186127,186128,0103,旱地,01,泥(页)岩,545312.538295,2.963672e+06
186128,186129,0305,灌木林地,03,泥(页)岩,545214.091124,2.963681e+06
186129,186130,0301,乔木林地,03,泥(页)岩,545185.208235,2.963506e+06


In [37]:
# 连接表格
result_df = pd.merge(polygon_df, df_merged, on='OBJECTID',how='left')

In [38]:
missing_ids = set(polygon_df['OBJECTID']) - set(df_merged['OBJECTID'])
print(f"Missing OBJECTID: {missing_ids}")
print(len(missing_ids))

Missing OBJECTID: {175106, 10249, 75789, 113171, 123923, 18967, 140831, 156198, 16424, 171051, 40494, 31796, 185396, 178744, 76346, 115260, 134210, 126535, 161864, 91210, 5711, 104022, 67159, 180835, 127078, 71797, 97399, 83066, 117371, 174203, 75389, 106621, 75903, 150656, 172158, 174719, 182911, 62085, 3211, 68238, 101006, 113301, 28310, 66200, 182940, 79520, 174754, 150179, 16038, 97960, 60588, 43694, 152241, 73397, 69304, 70329, 144571, 33980, 35521, 98506, 63181, 51413, 82651, 21725, 121062, 78568, 74995, 25845, 84222, 169216, 134406, 3337, 2315, 52512, 144676, 80175, 182577, 50491, 41276, 59196, 159037, 169281, 99140, 65351, 78154, 182603, 20819, 35668, 1896, 182634, 93048, 123257, 61309, 49021, 67455, 16256, 70529, 142717, 176510, 128902, 171915, 102285, 136593, 100754, 148370, 48533, 31639, 144807, 46505, 55210, 117166, 52657, 131507, 105912, 67515, 65999, 174033, 26578, 4052, 3030, 24539, 143838, 66029, 40432, 82417, 45043}
126


In [39]:
print(len(polygon_df['OBJECTID']),len(df_merged['OBJECTID']))

186131 186005


In [40]:
# 选择数值类型的列
import numpy as np
numeric_cols = result_df.select_dtypes(include=[np.float64])

# 计算数值列的均值
mean_values = numeric_cols.mean()

# 用均值填充数值列的空值
result_df[numeric_cols.columns] = numeric_cols.fillna(mean_values)

In [41]:
# 保存训练数据
train_data_path = r'F:\cache_data\zone_ana\sb\prediction_data'
result_df.to_csv(os.path.join(train_data_path, 'result_20240905.csv'), index=False)