In [13]:
import arcpy
from arcpy import env
import pandas as pd
import numpy as np
from autogluon.tabular import TabularPredictor
from pykrige.ok import OrdinaryKriging
from sklearn.metrics import r2_score

In [14]:
# 数组整形
def resize_arrays(A, B, fill_value=0):
    """调整数组形状一致"""
    new_shape = (max(A.shape[0], B.shape[0]), max(A.shape[1], B.shape[1]))

    if A.shape != new_shape:
        if A.shape[0] < new_shape[0]:
            padding_rows = new_shape[0] - A.shape[0]
            padding = np.full((padding_rows, A.shape[1]), fill_value)
            A = np.vstack((A, padding))
        elif A.shape[0] > new_shape[0]:
            A = A[:new_shape[0], :]

        if A.shape[1] < new_shape[1]:
            pad_width = ((0, 0), (0, new_shape[1] - A.shape[1]))
            A = np.pad(A, pad_width, mode='constant', constant_values=fill_value)
        elif A.shape[1] > new_shape[1]:
            A = A[:, :new_shape[1]]
    
    if B.shape != new_shape:
        if B.shape[0] < new_shape[0]:
            padding_rows = new_shape[0] - B.shape[0]
            padding = np.full((padding_rows, B.shape[1]), fill_value)
            B = np.vstack((B, padding))
        elif B.shape[0] > new_shape[0]:
            B = B[:new_shape[0], :]

        if B.shape[1] < new_shape[1]:
            pad_width = ((0, 0), (0, new_shape[1] - B.shape[1]))
            B = np.pad(B, pad_width, mode='constant', constant_values=fill_value)
        elif B.shape[1] > new_shape[1]:
            B = B[:, :new_shape[1]]
    
    return A, B


In [15]:
# predictor = TabularPredictor(label=label).fit(train_data,time_limit=600)
predictor = TabularPredictor.load(r"D:\ArcgisData\pred_ph\ph_pred_moudle\normal3")

In [16]:
predictor.feature_metadata_in.get_features()

['TWI5',
 'TPI201',
 'TMP',
 'SLOP',
 'PRE',
 'NIGTH',
 'NDVI',
 'DZ',
 'DL',
 'LON',
 'LAT']

In [17]:
# 设置工作环境
env.workspace = r"D:\ArcgisData\basedata\basetrain_30m.gdb"
arcpy.ListRasters()

['DEM',
 'TWI_5',
 'TPI_201',
 'TPI_101',
 'TPI_11',
 'TPI_3',
 'TMP',
 'SOILQS',
 'SLOP',
 'PRE',
 'NIGTH',
 'NDVI',
 'CUR',
 'ASP',
 'PLCUR',
 'POCUR',
 'OSJL',
 'LAT',
 'LON',
 'DZ',
 'DL']

In [18]:
# 过滤所用的特征因子
check_list = ['TWI_5', 'TPI_201', 'TMP', 'SLOP', 'PRE', 'NIGTH', 'NDVI', 'DZ', 'DL']
feature_list = [_ for _ in arcpy.ListRasters() if _ in check_list ]
feature_list,len(feature_list),len(check_list)

(['TWI_5', 'TPI_201', 'TMP', 'SLOP', 'PRE', 'NIGTH', 'NDVI', 'DZ', 'DL'], 9, 9)

In [19]:
for one_raster in feature_list:
    print(one_raster,arcpy.RasterToNumPyArray(one_raster).shape)

TWI_5 (2481, 1849)
TPI_201 (2481, 1849)
TMP (2481, 1849)
SLOP (2481, 1849)
PRE (2481, 1849)
NIGTH (2481, 1849)
NDVI (2481, 1849)
DZ (2481, 1849)
DL (2481, 1849)


In [21]:
dem_array = arcpy.RasterToNumPyArray("DEM")
dl_array = arcpy.RasterToNumPyArray("DL")
dz_array = arcpy.RasterToNumPyArray("DZ")

In [22]:
dz_array = resize_arrays(dem_array,dz_array,8)[1]
dl_array = resize_arrays(dem_array,dz_array,9)[1]
dz_array.shape,dl_array.shape

((2481, 1849), (2481, 1849))

In [23]:
#(['TWI_5', 'TPI_201', 'TMP', 'SLOP', 'PRE', 'NIGTH', 'NDVI', 'DZ'], 8, 8)
# asp = arcpy.RasterToNumPyArray("ASP").flatten()
# dem = arcpy.RasterToNumPyArray("DEM").flatten()
dl = dl_array.flatten()
ndvi = arcpy.RasterToNumPyArray("NDVI").flatten()
night = arcpy.RasterToNumPyArray("NIGTH").flatten()
pre = arcpy.RasterToNumPyArray("PRE").flatten()
slope = arcpy.RasterToNumPyArray("SLOP").flatten()
# soilqs = arcpy.RasterToNumPyArray("SOILQS").flatten()
tmp = arcpy.RasterToNumPyArray("TMP").flatten()
# tpi11 = arcpy.RasterToNumPyArray("TPI_11").flatten()
# tpi101 = arcpy.RasterToNumPyArray("TPI_101").flatten()
tpi201 = arcpy.RasterToNumPyArray("TPI_201").flatten()
# tpi3 = arcpy.RasterToNumPyArray("TPI_3").flatten()
twi5 = arcpy.RasterToNumPyArray("TWI_5").flatten()
dz = dz_array.flatten()


In [24]:
np.min(arcpy.RasterToNumPyArray("NDVI"))

0.0

In [25]:
# 构造经纬度信息
desc = arcpy.Describe("DEM")
origin_x = desc.extent.XMin
origin_y = desc.extent.YMax
pixel_width = desc.meanCellWidth
pixel_height = desc.meanCellHeight
print(origin_x,origin_y,pixel_width,pixel_height)

397387.5 3153427.5 30.0 30.0


In [26]:
# 经度
array_x = np.zeros(dem_array.shape, dtype=np.float32)
array_x[:, 0] = 397387.5+(pixel_width/2)
for i in range(1, dem_array.shape[1]):
    array_x[:, i] = array_x[:, i-1] + pixel_width
print(array_x.shape,array_x[0,1],array_x[0,-1])

(2481, 1849) 397432.5 452842.5


In [27]:
# 纬度
array_y = np.zeros(dem_array.shape,dtype=np.float32)
array_y[0] = 3153427.5+(pixel_height/2)
for i in range(1, dem_array.shape[0]):
    array_y[i] = array_y[i-1] - pixel_height
print(array_y.shape,array_y[0][0],array_y[-1][0])

(2481, 1849) 3153442.5 3079042.5


In [28]:
x = array_x.flatten()
y = array_y.flatten()

In [29]:
# features2 = np.column_stack((x,y,asp,dem,dl,ndvi,nigth,pre,slope,soilqs,tmp,tpi11,tpi101,tpi201,tpi3,twi5,dz))
features2 = np.column_stack((twi5,tpi201,tmp,slope,pre,night,ndvi,dz,dl,x,y))

In [30]:
features2.size

50461059

In [31]:
features2[300000]

array([3.9965148e+00, 4.2540405e+01, 1.2275000e+02, 1.5501679e+01,
       9.1250000e+02, 1.6000000e-01, 3.2400000e+02, 6.0000000e+00,
       6.0000000e+00, 4.1126250e+05, 3.1485825e+06], dtype=float32)

In [32]:
# xulian_data = pd.DataFrame(features2,columns=['X','Y','ASP','DEM','DL','NDVI','NIGHT','PRE','SLOPE','SOILQS','TMP','TPI11','TPI101','TPI201','TPI3','TWI5','DZ'])

xulian_data = pd.DataFrame(features2,columns=predictor.feature_metadata_in.get_features())

In [33]:
xulian_data.shape

(4587369, 11)

In [34]:
xulian_data.dtypes

TWI5      float32
TPI201    float32
TMP       float32
SLOP      float32
PRE       float32
NIGTH     float32
NDVI      float32
DZ        float32
DL        float32
LON       float32
LAT       float32
dtype: object

In [35]:
xulian_data['DL'] = xulian_data['DL'].astype(str)
xulian_data['DZ'] = xulian_data['DZ'].astype(str)

In [36]:
xulian_data.dtypes

TWI5      float32
TPI201    float32
TMP       float32
SLOP      float32
PRE       float32
NIGTH     float32
NDVI      float32
DZ         object
DL         object
LON       float32
LAT       float32
dtype: object

In [37]:
xulian_data.describe()

Unnamed: 0,TWI5,TPI201,TMP,SLOP,PRE,NIGTH,NDVI,LON,LAT
count,4587369.0,4587369.0,4587369.0,4587369.0,4587369.0,4587369.0,4587369.0,4587369.0,4587369.0
mean,2.95759,0.05391708,91.0307,16.41409,517.5721,0.2461364,1472.668,425122.6,3116243.0
std,2.687172,37.9169,72.24962,17.07998,408.3257,1.025573,2307.893,16012.81,21486.09
min,-0.04508924,-321.9223,0.0,0.0,0.0,0.0,0.0,397402.5,3079042.0
25%,0.0,-9.991333,0.0,0.0,0.0,0.0,0.0,411262.5,3097642.0
50%,3.428443,0.0,136.9167,13.08498,821.1667,0.22,183.0,425122.5,3116242.0
75%,4.885564,9.828369,152.25,29.74471,839.9167,0.28,2311.0,438982.5,3134842.0
max,25.08574,250.0112,169.6667,84.59243,932.6667,44.69,9773.0,452842.5,3153442.0


In [38]:
import os
out_path = r"D:\ArcgisData\pred_ph\out_table_30m\cut_csv"
chunk_size = 400000
total_rows = xulian_data.shape[0]
for i in range(0, total_rows, chunk_size):
    start = i
    end = min(i + chunk_size, total_rows)
    filename =  os.path.join(out_path,f'data_chunk_{i}.csv') # 文件名格式可以根据您的需要进行修改
    df_chunk = xulian_data.iloc[start:end]
    df_chunk.to_csv(filename, index=False)
    print(i)



0
400000
800000
1200000
1600000
2000000
2400000
2800000
3200000
3600000
4000000
4400000


In [39]:
# 读取特征表
table_list = [os.path.join(out_path,_) for _ in os.listdir(out_path)]
table_list,len(table_list)

(['D:\\ArcgisData\\pred_ph\\out_table_30m\\cut_csv\\data_chunk_0.csv',
  'D:\\ArcgisData\\pred_ph\\out_table_30m\\cut_csv\\data_chunk_1200000.csv',
  'D:\\ArcgisData\\pred_ph\\out_table_30m\\cut_csv\\data_chunk_1600000.csv',
  'D:\\ArcgisData\\pred_ph\\out_table_30m\\cut_csv\\data_chunk_2000000.csv',
  'D:\\ArcgisData\\pred_ph\\out_table_30m\\cut_csv\\data_chunk_2400000.csv',
  'D:\\ArcgisData\\pred_ph\\out_table_30m\\cut_csv\\data_chunk_2800000.csv',
  'D:\\ArcgisData\\pred_ph\\out_table_30m\\cut_csv\\data_chunk_3200000.csv',
  'D:\\ArcgisData\\pred_ph\\out_table_30m\\cut_csv\\data_chunk_3600000.csv',
  'D:\\ArcgisData\\pred_ph\\out_table_30m\\cut_csv\\data_chunk_400000.csv',
  'D:\\ArcgisData\\pred_ph\\out_table_30m\\cut_csv\\data_chunk_4000000.csv',
  'D:\\ArcgisData\\pred_ph\\out_table_30m\\cut_csv\\data_chunk_4400000.csv',
  'D:\\ArcgisData\\pred_ph\\out_table_30m\\cut_csv\\data_chunk_800000.csv'],
 12)

In [40]:
# 排序列表

# 使用lambda函数将文件名按照最后一个下划线后面的数字大小进行排序
sorted_files = sorted(table_list, key=lambda x: int(x.rsplit('_', 1)[-1].split('.')[0]))
sorted_files

['D:\\ArcgisData\\pred_ph\\out_table_30m\\cut_csv\\data_chunk_0.csv',
 'D:\\ArcgisData\\pred_ph\\out_table_30m\\cut_csv\\data_chunk_400000.csv',
 'D:\\ArcgisData\\pred_ph\\out_table_30m\\cut_csv\\data_chunk_800000.csv',
 'D:\\ArcgisData\\pred_ph\\out_table_30m\\cut_csv\\data_chunk_1200000.csv',
 'D:\\ArcgisData\\pred_ph\\out_table_30m\\cut_csv\\data_chunk_1600000.csv',
 'D:\\ArcgisData\\pred_ph\\out_table_30m\\cut_csv\\data_chunk_2000000.csv',
 'D:\\ArcgisData\\pred_ph\\out_table_30m\\cut_csv\\data_chunk_2400000.csv',
 'D:\\ArcgisData\\pred_ph\\out_table_30m\\cut_csv\\data_chunk_2800000.csv',
 'D:\\ArcgisData\\pred_ph\\out_table_30m\\cut_csv\\data_chunk_3200000.csv',
 'D:\\ArcgisData\\pred_ph\\out_table_30m\\cut_csv\\data_chunk_3600000.csv',
 'D:\\ArcgisData\\pred_ph\\out_table_30m\\cut_csv\\data_chunk_4000000.csv',
 'D:\\ArcgisData\\pred_ph\\out_table_30m\\cut_csv\\data_chunk_4400000.csv']

In [41]:
# 预测数据存储位置
result_path = r"D:\ArcgisData\pred_ph\out_table_30m\pre_csv"

In [42]:
n = 0
for one_table in sorted_files:
    data_df = pd.read_csv(one_table)
    temp_pred = predictor.predict(data_df)
    temp_pred.to_csv(os.path.join(result_path,f"{n}.csv"))
    n+=1
    print(n)

1
2
3
4
5
6
7
8
9
10
11
12


In [43]:
# 读取预测结果
pre_csv_list = [os.path.join(result_path,_) for _ in os.listdir(result_path)]
pre_csv_list = sorted(pre_csv_list,key=lambda x:int(x.rsplit('\\', -1)[-1].split('.')[0]))
pre_csv_list

['D:\\ArcgisData\\pred_ph\\out_table_30m\\pre_csv\\0.csv',
 'D:\\ArcgisData\\pred_ph\\out_table_30m\\pre_csv\\1.csv',
 'D:\\ArcgisData\\pred_ph\\out_table_30m\\pre_csv\\2.csv',
 'D:\\ArcgisData\\pred_ph\\out_table_30m\\pre_csv\\3.csv',
 'D:\\ArcgisData\\pred_ph\\out_table_30m\\pre_csv\\4.csv',
 'D:\\ArcgisData\\pred_ph\\out_table_30m\\pre_csv\\5.csv',
 'D:\\ArcgisData\\pred_ph\\out_table_30m\\pre_csv\\6.csv',
 'D:\\ArcgisData\\pred_ph\\out_table_30m\\pre_csv\\7.csv',
 'D:\\ArcgisData\\pred_ph\\out_table_30m\\pre_csv\\8.csv',
 'D:\\ArcgisData\\pred_ph\\out_table_30m\\pre_csv\\9.csv',
 'D:\\ArcgisData\\pred_ph\\out_table_30m\\pre_csv\\10.csv',
 'D:\\ArcgisData\\pred_ph\\out_table_30m\\pre_csv\\11.csv']

In [44]:
pre_df = pd.read_csv(pre_csv_list[0])
for one_pred in pre_csv_list[1:]:
    temp_df = pd.read_csv(one_pred)
    pre_df = pd.concat([pre_df,temp_df],axis=0)
    print(one_pred)

D:\ArcgisData\pred_ph\out_table_30m\pre_csv\1.csv
D:\ArcgisData\pred_ph\out_table_30m\pre_csv\2.csv
D:\ArcgisData\pred_ph\out_table_30m\pre_csv\3.csv
D:\ArcgisData\pred_ph\out_table_30m\pre_csv\4.csv
D:\ArcgisData\pred_ph\out_table_30m\pre_csv\5.csv
D:\ArcgisData\pred_ph\out_table_30m\pre_csv\6.csv
D:\ArcgisData\pred_ph\out_table_30m\pre_csv\7.csv
D:\ArcgisData\pred_ph\out_table_30m\pre_csv\8.csv
D:\ArcgisData\pred_ph\out_table_30m\pre_csv\9.csv
D:\ArcgisData\pred_ph\out_table_30m\pre_csv\10.csv
D:\ArcgisData\pred_ph\out_table_30m\pre_csv\11.csv


In [45]:
# 保存完整的预测数据
pre_df.to_csv(os.path.join(r"D:\ArcgisData\pred_ph\out_table_30m\merge_csv","result.csv"))

In [46]:
dem_array.shape,pre_df.size,len(pre_df)

((2481, 1849), 9174738, 4587369)

In [47]:
pre_df.columns

Index(['Unnamed: 0', 'Ph'], dtype='object')

In [48]:
pre_df.describe()

Unnamed: 0.1,Unnamed: 0,Ph
count,4587369.0,4587369.0
mean,195657.1,5.986626
std,115546.7,0.4246971
min,0.0,5.180439
25%,95570.0,5.550124
50%,191483.0,5.987164
75%,295741.0,6.350526
max,399999.0,7.254052


In [49]:
raster_array = np.reshape(pre_df['Ph'].values,dem_array.shape)
raster_array

array([[5.571062 , 5.5710554, 5.5710487, ..., 5.497963 , 5.497978 ,
        5.4979935],
       [5.571062 , 5.5710554, 5.5710487, ..., 5.497943 , 5.497958 ,
        5.4979744],
       [5.571061 , 5.5710545, 5.571048 , ..., 5.4979234, 5.4979386,
        5.497955 ],
       ...,
       [5.7387757, 5.7387867, 5.7387967, ..., 5.46234  , 5.4622617,
        5.462184 ],
       [5.738775 , 5.7387853, 5.7387958, ..., 5.4626155, 5.4625363,
        5.4624577],
       [5.738774 , 5.7387834, 5.738794 , ..., 5.462891 , 5.462811 ,
        5.4627314]])

In [50]:
# 
env.extent = "DEM"

In [51]:
# 掩膜提取
def mask_raster(array,mask_ele,cell_size):
    out_raster = arcpy.NumPyArrayToRaster(
    array,
    arcpy.Point(arcpy.env.extent.XMin, arcpy.env.extent.YMin),
    cell_size,
    cell_size,
)
    """按掩膜提取栅格,空间参考设定为:CGCS2000_3_Degree_GK_CM_108E"""
    output_coordinate_system = arcpy.Describe(mask_ele).spatialReference
    with arcpy.EnvManager(outputCoordinateSystem=output_coordinate_system,snapRaster=mask_ele, cellSize=mask_ele):
        result_raster = arcpy.sa.ExtractByMask(out_raster, mask_ele, "INSIDE")
        return result_raster

In [52]:
# 按掩膜处理
result_path = r"D:\ArcgisData\pred_ph\PH_BaseData.gdb"
result_raster = mask_raster(raster_array,"DEM", 30)
result_raster.save(os.path.join(result_path,"RESULT_PH_30"))
print("完成")

完成
