In [1]:
import os
import arcpy
from arcpy import env
from arcpy.sa import *
from arcpy.da import *
import pandas as pd

In [2]:
def zonstatis(in_table, fields, value_raster,out_table):
    """统计分区矢量内的栅格信息
    :param in_table: 矢量分区
    :param fields: 矢量字段
    :param value_raster: 栅格数据
    :param out_table: 输出文件"""
    ZonalStatisticsAsTable(in_zone_data=in_table,zone_field=fields,in_value_raster=value_raster,out_table=out_table,ignore_nodata='#',statistics_type='ALL')

In [3]:
# 矢量数据路径
polygon_path = r"D:\ArcGISProjects\workspace\shbyq\DZ.gdb\pre_sd_data"
# 栅格数据路径
raster_path = r'D:\ArcGISProjects\workspace\shbyq\feature_raster_file\features_data_dy.gdb'

In [4]:
# 统计结果表格存放路径
temp_table_gdb_path = r'F:\cache_data\zone_ana\dy\zone_temp.gdb'

In [5]:
# 使用Delete_management函数删除数据库中的所有内容
try:
    arcpy.Delete_management(temp_table_gdb_path)
except:
    pass
# 再创建一个新的数据库
arcpy.management.CreateFileGDB(os.path.dirname(temp_table_gdb_path), "zone_temp", "CURRENT")

In [6]:
# 切换工作空间
env.workspace = raster_path

In [7]:
# 获取需要处理的栅格列表
raster_list = arcpy.ListRasters()

In [8]:
[_.name for _ in arcpy.ListFields(polygon_path)]

['OBJECTID_1',
 'Shape',
 'FID_DY_SD',
 'OBJECTID',
 'DLBM',
 'DLMC',
 '母质',
 'Shape_Length',
 'Shape_Area']

In [9]:

# 逐一统计分析
for one_raster in raster_list:
    print(one_raster)
    # 分区统计
    zonstatis(polygon_path,'OBJECTID',one_raster,os.path.join(temp_table_gdb_path,one_raster))

DEM
AnalyticalHillshading
Aspect
ChannelNetworkBaseLevel
ChannelNetworkDistance
ClosedDepressions
ConvergenceIndex
LSFactor
PlanCurvature
ProfileCurvature
RelativeSlopePosition
Slope
TopographicWetnessIndex
TotalCatchmentArea
ValleyDepth
Contrast
Correlation
Dissimilarity
Entropy
Homogeneity
Mean
ndvi
PCA_0
PCA_1
SecondMoment
Variance
PRE
SRA
TMP
VAP
WIND
BIO
LON
LAT
PH
DZ
DL
MRRTF
MRVBF


### 构造训练表


In [10]:
# 切换工作空间
env.workspace = temp_table_gdb_path

In [11]:
# 获取表格列表
table_list = arcpy.ListTables()

In [12]:
# 指定需要的信息
line_fields = 'OBJECTID'
df_list = []
for one_table in table_list:
    # 将表转换为pandas数据帧
    df = pd.DataFrame(arcpy.da.TableToNumPyArray(one_table, "*"))  # 确保数据表中无空值
    # 提取统计字段的数据
    merged_df = df[[line_fields]+ list(df.columns[-10:])]
    # 在统计数据列名中添加表名
    merged_df.columns = [line_fields]+[one_table + '_' + col for col in merged_df.columns[-10:]]
    # 添加到列表
    df_list.append(merged_df)
# 初始合并数据帧
df_merged = df_list[0]
# 逐个合并其余的数据帧
for df in df_list[1:]:
    df_merged = pd.merge(df_merged, df, on=line_fields, how='outer')

In [13]:
df_merged

Unnamed: 0,OBJECTID,DEM_COUNT,DEM_AREA,DEM_MIN,DEM_MAX,DEM_RANGE,DEM_MEAN,DEM_STD,DEM_SUM,DEM_MEDIAN,...,MRVBF_COUNT,MRVBF_AREA,MRVBF_MIN,MRVBF_MAX,MRVBF_RANGE,MRVBF_MEAN,MRVBF_STD,MRVBF_SUM,MRVBF_MEDIAN,MRVBF_PCT90
0,1,73.0,1825.0,810.599976,831.500000,20.900024,820.541093,6.291782,59899.499756,819.900024,...,73.0,1825.0,0.004468,2.967779,2.963311,0.959029,0.736500,70.009127,0.875263,1.882469
1,2,128.0,3200.0,818.900024,833.500000,14.599976,825.126560,2.758637,105616.199707,825.099976,...,128.0,3200.0,0.006735,1.977468,1.970733,0.743168,0.583104,95.125493,0.593158,1.722624
2,3,16.0,400.0,814.200012,821.299988,7.099976,817.437500,1.950917,13079.000000,817.500000,...,16.0,400.0,0.000272,0.932592,0.932319,0.562656,0.331294,9.002499,0.675375,0.896028
3,4,18.0,450.0,960.299988,963.900024,3.600037,962.055552,1.146118,17316.999939,962.299988,...,18.0,450.0,0.181701,0.520207,0.338506,0.411477,0.102469,7.406594,0.459067,0.515043
4,5,17.0,425.0,901.500000,905.700012,4.200012,904.005874,1.029835,15368.099854,904.000000,...,17.0,425.0,0.190537,2.840893,2.650356,1.726206,0.885498,29.345510,1.961003,2.788571
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
115437,115438,24.0,600.0,974.900024,977.500000,2.599976,975.754166,0.786328,23418.099976,975.400024,...,24.0,600.0,4.839455,4.960456,0.121002,4.910208,0.040304,117.844982,4.896280,4.958958
115438,115439,58.0,1450.0,1101.699951,1111.099976,9.400024,1106.482759,2.493724,64176.000000,1106.500000,...,58.0,1450.0,0.448908,3.894851,3.445943,1.512004,1.161539,87.696221,0.931238,3.665490
115439,115440,64.0,1600.0,816.900024,841.799988,24.899963,831.176560,7.127055,53195.299866,831.799988,...,64.0,1600.0,0.125199,0.564125,0.438926,0.272038,0.099699,17.410435,0.260546,0.385905
115440,115441,202.0,5050.0,898.299988,937.099976,38.799988,917.914352,9.485084,185418.699158,918.149963,...,202.0,5050.0,0.000141,0.587743,0.587602,0.044742,0.076410,9.037945,0.012571,0.128567


In [14]:
polygon_path = polygon_path

In [15]:
# 添加字段
def add_field(input_table,field_name,field_type='DOUBLE'):
    """参数说明：
        input_table: 输入数据表
        field_name: 字段名
        field_type: 字段类型"""
    arcpy.AddField_management(input_table,field_name,field_type)

In [17]:

# 新增面特征信息
# 添加新字段来存储四至坐标
for one_fields in ['XMin', 'YMin', 'XMax', 'YMax','Centroid_X','Centroid_Y']:
    add_field(polygon_path,one_fields)


In [18]:
# 更新字段值
with arcpy.da.UpdateCursor(polygon_path, ["SHAPE@", "XMin", "YMin", "XMax", "YMax", "Centroid_X", "Centroid_Y"]) as cursor:
    for row in cursor:
        centroid = row[0].centroid
        extent = row[0].extent
        row[1] = extent.XMin
        row[2] = extent.YMin
        row[3] = extent.XMax
        row[4] = extent.YMax
        row[5] = centroid.X
        row[6] = centroid.Y
        cursor.updateRow(row)

print("四至坐标计算完成并存储在新字段中。")


四至坐标计算完成并存储在新字段中。


In [19]:
polygon_fields_list = [_.name for _ in arcpy.ListFields(polygon_path)]

In [20]:
print(polygon_fields_list)

['OBJECTID_1', 'Shape', 'FID_DY_SD', 'OBJECTID', 'DLBM', 'DLMC', '母质', 'Shape_Length', 'Shape_Area', 'XMin', 'YMin', 'XMax', 'YMax', 'Centroid_X', 'Centroid_Y']


In [22]:
polygon_df = pd.DataFrame(arcpy.da.FeatureClassToNumPyArray(polygon_path, ['OBJECTID', 'DLMC', '母质',  'XMin', 'YMin', 'XMax', 'YMax', 'Centroid_X', 'Centroid_Y']))

In [23]:
polygon_df

Unnamed: 0,OBJECTID,DLMC,母质,XMin,YMin,XMax,YMax,Centroid_X,Centroid_Y
0,1,水田,碳酸岩,453415.7591,2.909007e+06,453504.8993,2.909054e+06,453460.380336,2.909038e+06
1,2,水田,碳酸岩,453745.4063,2.909096e+06,453833.4091,2.909244e+06,453776.184840,2.909171e+06
2,3,水田,碳酸岩,453397.2270,2.909322e+06,453426.3763,2.909352e+06,453414.506759,2.909337e+06
3,4,水田,碳酸岩,453172.7335,2.903237e+06,453194.5528,2.903281e+06,453180.794689,2.903264e+06
4,5,水田,碳酸岩,453040.1877,2.903039e+06,453105.8702,2.903078e+06,453076.497652,2.903056e+06
...,...,...,...,...,...,...,...,...,...
140116,71782,乔木林地,泥(页)岩,452221.7730,2.892018e+06,452321.4983,2.892221e+06,452271.635600,2.892061e+06
140117,71782,乔木林地,泥(页)岩,452234.2057,2.891939e+06,452675.8087,2.892295e+06,452444.764127,2.892138e+06
140118,71784,乔木林地,泥(页)岩,452234.2057,2.891939e+06,452675.8087,2.892295e+06,452444.764127,2.892138e+06
140119,74834,乔木林地,砂岩,441506.1327,2.876965e+06,441558.2704,2.876986e+06,441532.863852,2.876972e+06


In [24]:
# 连接表格
result_df = pd.merge(polygon_df, df_merged, on='OBJECTID',how='left')

In [25]:
result_df

Unnamed: 0,OBJECTID,DLMC,母质,XMin,YMin,XMax,YMax,Centroid_X,Centroid_Y,DEM_COUNT,...,MRVBF_COUNT,MRVBF_AREA,MRVBF_MIN,MRVBF_MAX,MRVBF_RANGE,MRVBF_MEAN,MRVBF_STD,MRVBF_SUM,MRVBF_MEDIAN,MRVBF_PCT90
0,1,水田,碳酸岩,453415.7591,2.909007e+06,453504.8993,2.909054e+06,453460.380336,2.909038e+06,73.0,...,73.0,1825.0,0.004468,2.967779,2.963311,0.959029,0.736500,70.009127,0.875263,1.882469
1,2,水田,碳酸岩,453745.4063,2.909096e+06,453833.4091,2.909244e+06,453776.184840,2.909171e+06,128.0,...,128.0,3200.0,0.006735,1.977468,1.970733,0.743168,0.583104,95.125493,0.593158,1.722624
2,3,水田,碳酸岩,453397.2270,2.909322e+06,453426.3763,2.909352e+06,453414.506759,2.909337e+06,16.0,...,16.0,400.0,0.000272,0.932592,0.932319,0.562656,0.331294,9.002499,0.675375,0.896028
3,4,水田,碳酸岩,453172.7335,2.903237e+06,453194.5528,2.903281e+06,453180.794689,2.903264e+06,18.0,...,18.0,450.0,0.181701,0.520207,0.338506,0.411477,0.102469,7.406594,0.459067,0.515043
4,5,水田,碳酸岩,453040.1877,2.903039e+06,453105.8702,2.903078e+06,453076.497652,2.903056e+06,17.0,...,17.0,425.0,0.190537,2.840893,2.650356,1.726206,0.885498,29.345510,1.961003,2.788571
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
140116,71782,乔木林地,泥(页)岩,452221.7730,2.892018e+06,452321.4983,2.892221e+06,452271.635600,2.892061e+06,42.0,...,42.0,1050.0,0.000314,0.484365,0.484050,0.058637,0.086601,2.462733,0.034503,0.139777
140117,71782,乔木林地,泥(页)岩,452234.2057,2.891939e+06,452675.8087,2.892295e+06,452444.764127,2.892138e+06,42.0,...,42.0,1050.0,0.000314,0.484365,0.484050,0.058637,0.086601,2.462733,0.034503,0.139777
140118,71784,乔木林地,泥(页)岩,452234.2057,2.891939e+06,452675.8087,2.892295e+06,452444.764127,2.892138e+06,54.0,...,54.0,1350.0,0.005645,0.421874,0.416229,0.113402,0.108836,6.123722,0.070846,0.276586
140119,74834,乔木林地,砂岩,441506.1327,2.876965e+06,441558.2704,2.876986e+06,441532.863852,2.876972e+06,601.0,...,601.0,15025.0,0.000058,2.747128,2.747071,0.305597,0.503204,183.663880,0.108120,0.699705


In [26]:
# 保存训练数据
train_data_path = r'F:\cache_data\zone_ana\dy\prediction_data'
result_df.to_csv(os.path.join(train_data_path, 'result_20240618.csv'), index=False)