In [48]:
import arcpy
from arcpy import env
import pandas as pd
import numpy as np
from autogluon.tabular import TabularPredictor
from pykrige.ok import OrdinaryKriging
from sklearn.metrics import r2_score

In [50]:
# 数组整形
def resize_arrays(A, B, fill_value=0):
    """调整数组形状一致"""
    new_shape = (max(A.shape[0], B.shape[0]), max(A.shape[1], B.shape[1]))

    if A.shape != new_shape:
        if A.shape[0] < new_shape[0]:
            padding_rows = new_shape[0] - A.shape[0]
            padding = np.full((padding_rows, A.shape[1]), fill_value)
            A = np.vstack((A, padding))
        elif A.shape[0] > new_shape[0]:
            A = A[:new_shape[0], :]

        if A.shape[1] < new_shape[1]:
            pad_width = ((0, 0), (0, new_shape[1] - A.shape[1]))
            A = np.pad(A, pad_width, mode='constant', constant_values=fill_value)
        elif A.shape[1] > new_shape[1]:
            A = A[:, :new_shape[1]]
    
    if B.shape != new_shape:
        if B.shape[0] < new_shape[0]:
            padding_rows = new_shape[0] - B.shape[0]
            padding = np.full((padding_rows, B.shape[1]), fill_value)
            B = np.vstack((B, padding))
        elif B.shape[0] > new_shape[0]:
            B = B[:new_shape[0], :]

        if B.shape[1] < new_shape[1]:
            pad_width = ((0, 0), (0, new_shape[1] - B.shape[1]))
            B = np.pad(B, pad_width, mode='constant', constant_values=fill_value)
        elif B.shape[1] > new_shape[1]:
            B = B[:, :new_shape[1]]
    
    return A, B


In [70]:
# predictor = TabularPredictor(label=label).fit(train_data,time_limit=600)
predictor = TabularPredictor.load(r"D:\ArcgisData\pred_organic\N_pred_moudle\zscore_normal")

In [52]:
# 设置工作环境
env.workspace = r"D:\ArcgisData\basedata\basetrain_30m.gdb"
arcpy.ListRasters()

['DEM',
 'TWI_5',
 'TPI_201',
 'TPI_101',
 'TPI_11',
 'TPI_3',
 'TMP',
 'SOILQS',
 'SLOP',
 'PRE',
 'NIGTH',
 'NDVI',
 'DZ',
 'DL',
 'CUR',
 'ASP',
 'PLCUR',
 'POCUR',
 'RESULT',
 'RESULT2']

In [53]:
# 过滤所用的特征因子
check_list = ['X', 'Y', 'N', 'ASP', 'DEM', 'DL', 'NDVI', 'NIGTH', 'PRE', 'SLOP', 'SOILQS', 'TMP', 'TPI_11', 'TPI_101','TPI_201','TPI_3','TWI_5','DZ']
feature_list = [_ for _ in arcpy.ListRasters() if _ in check_list ]
feature_list,len(feature_list),len(check_list)

(['DEM',
  'TWI_5',
  'TPI_201',
  'TPI_101',
  'TPI_11',
  'TPI_3',
  'TMP',
  'SOILQS',
  'SLOP',
  'PRE',
  'NIGTH',
  'NDVI',
  'DZ',
  'DL',
  'ASP'],
 15,
 18)

In [54]:
for one_raster in feature_list:
    print(one_raster,arcpy.RasterToNumPyArray(one_raster).shape)

DEM (2481, 1849)
TWI_5 (2481, 1849)
TPI_201 (2481, 1849)
TPI_101 (2481, 1849)
TPI_11 (2481, 1849)
TPI_3 (2481, 1849)
TMP (2481, 1849)
SOILQS (2481, 1849)
SLOP (2481, 1849)
PRE (2481, 1849)
NIGTH (2481, 1849)
NDVI (2481, 1849)
DZ (2481, 1792)
DL (2470, 1834)
ASP (2481, 1849)


In [55]:
dem_array = arcpy.RasterToNumPyArray("DEM")
dl_array = arcpy.RasterToNumPyArray("DL")
dz_array = arcpy.RasterToNumPyArray("DZ")

In [56]:
dl_array = resize_arrays(dem_array,dl_array,999)[1]
dz_array = resize_arrays(dem_array,dz_array,8)[1]
dl_array.shape,dz_array.shape

((2481, 1849), (2481, 1849))

In [None]:
# 构造flat
# ['X', 'Y', 'N', 'ASP', 'DEM', 'DL', 'NDVI', 'NIGTH', 'PRE', 'SLOP', 'SOILQS', 'TMP', 'TPI_11', 'TPI_101','TPI_201','TPI_3','TWI_5','DZ']

In [57]:

asp = arcpy.RasterToNumPyArray("ASP").flatten()
dem = arcpy.RasterToNumPyArray("DEM").flatten()
dl = dl_array.flatten()
ndvi = arcpy.RasterToNumPyArray("NDVI").flatten()
nigth = arcpy.RasterToNumPyArray("NIGTH").flatten()
pre = arcpy.RasterToNumPyArray("PRE").flatten()
slope = arcpy.RasterToNumPyArray("SLOP").flatten()
soilqs = arcpy.RasterToNumPyArray("SOILQS").flatten()
tmp = arcpy.RasterToNumPyArray("TMP").flatten()
tpi11 = arcpy.RasterToNumPyArray("TPI_11").flatten()
tpi101 = arcpy.RasterToNumPyArray("TPI_101").flatten()
tpi201 = arcpy.RasterToNumPyArray("TPI_201").flatten()
tpi3 = arcpy.RasterToNumPyArray("TPI_3").flatten()
twi5 = arcpy.RasterToNumPyArray("TWI_5").flatten()
dz = dz_array.flatten()


In [58]:
# 构造经纬度信息
desc = arcpy.Describe("DEM")
origin_x = desc.extent.XMin
origin_y = desc.extent.YMax
pixel_width = desc.meanCellWidth
pixel_height = desc.meanCellHeight
print(origin_x,origin_y,pixel_width,pixel_height)

397387.5 3153427.5 30.0 30.0


In [59]:
# 经度
array_x = np.zeros(dem_array.shape, dtype=np.float64)
array_x[:, 0] = 397387.5+15
for i in range(1, dem_array.shape[1]):
    array_x[:, i] = array_x[:, i-1] + 30
print(array_x.shape,array_x[0,1],array_x[0,-1])

(2481, 1849) 397432.5 452842.5


In [60]:
# 纬度
array_y = np.zeros(dem_array.shape,dtype=np.float64)
array_y[0] = 3153427.5+15
for i in range(1, dem_array.shape[0]):
    array_y[i] = array_y[i-1] - 30
print(array_y.shape,array_y[0][0],array_y[-1][0])

(2481, 1849) 3153442.5 3079042.5


In [61]:
x = array_x.flatten()
y = array_y.flatten()

In [62]:
features2 = np.column_stack((x,y,asp,dem,dl,ndvi,nigth,pre,slope,soilqs,tmp,tpi11,tpi101,tpi201,tpi3,twi5,dz))

In [63]:
features2.size

77985273

In [64]:
xulian_data = pd.DataFrame(features2,columns=['X','Y','ASP','DEM','DL','NDVI','NIGHT','PRE','SLOPE','SOILQS','TMP','TPI11','TPI101','TPI201','TPI3','TWI5','DZ'])

In [65]:
xulian_data.shape

(4587369, 17)

In [66]:
import os
out_path = r"D:\ArcgisData\pred_organic\N_out_table\cut_csv"
chunk_size = 400000
total_rows = xulian_data.shape[0]
for i in range(0, total_rows, chunk_size):
    start = i
    end = min(i + chunk_size, total_rows)
    filename =  os.path.join(out_path,f'data_chunk_{i}.csv') # 文件名格式可以根据您的需要进行修改
    df_chunk = xulian_data.iloc[start:end]
    df_chunk.to_csv(filename, index=False)
    print(i)



0
400000
800000
1200000
1600000
2000000
2400000
2800000
3200000
3600000
4000000
4400000


In [67]:
# 读取特征表
table_list = [os.path.join(out_path,_) for _ in os.listdir(out_path)]
table_list,len(table_list)

(['D:\\ArcgisData\\pred_organic\\N_out_table\\cut_csv\\data_chunk_0.csv',
  'D:\\ArcgisData\\pred_organic\\N_out_table\\cut_csv\\data_chunk_1200000.csv',
  'D:\\ArcgisData\\pred_organic\\N_out_table\\cut_csv\\data_chunk_1600000.csv',
  'D:\\ArcgisData\\pred_organic\\N_out_table\\cut_csv\\data_chunk_2000000.csv',
  'D:\\ArcgisData\\pred_organic\\N_out_table\\cut_csv\\data_chunk_2400000.csv',
  'D:\\ArcgisData\\pred_organic\\N_out_table\\cut_csv\\data_chunk_2800000.csv',
  'D:\\ArcgisData\\pred_organic\\N_out_table\\cut_csv\\data_chunk_3200000.csv',
  'D:\\ArcgisData\\pred_organic\\N_out_table\\cut_csv\\data_chunk_3600000.csv',
  'D:\\ArcgisData\\pred_organic\\N_out_table\\cut_csv\\data_chunk_400000.csv',
  'D:\\ArcgisData\\pred_organic\\N_out_table\\cut_csv\\data_chunk_4000000.csv',
  'D:\\ArcgisData\\pred_organic\\N_out_table\\cut_csv\\data_chunk_4400000.csv',
  'D:\\ArcgisData\\pred_organic\\N_out_table\\cut_csv\\data_chunk_800000.csv'],
 12)

In [68]:
# 排序列表

# 使用lambda函数将文件名按照最后一个下划线后面的数字大小进行排序
sorted_files = sorted(table_list, key=lambda x: int(x.rsplit('_', 1)[-1].split('.')[0]))
sorted_files

['D:\\ArcgisData\\pred_organic\\N_out_table\\cut_csv\\data_chunk_0.csv',
 'D:\\ArcgisData\\pred_organic\\N_out_table\\cut_csv\\data_chunk_400000.csv',
 'D:\\ArcgisData\\pred_organic\\N_out_table\\cut_csv\\data_chunk_800000.csv',
 'D:\\ArcgisData\\pred_organic\\N_out_table\\cut_csv\\data_chunk_1200000.csv',
 'D:\\ArcgisData\\pred_organic\\N_out_table\\cut_csv\\data_chunk_1600000.csv',
 'D:\\ArcgisData\\pred_organic\\N_out_table\\cut_csv\\data_chunk_2000000.csv',
 'D:\\ArcgisData\\pred_organic\\N_out_table\\cut_csv\\data_chunk_2400000.csv',
 'D:\\ArcgisData\\pred_organic\\N_out_table\\cut_csv\\data_chunk_2800000.csv',
 'D:\\ArcgisData\\pred_organic\\N_out_table\\cut_csv\\data_chunk_3200000.csv',
 'D:\\ArcgisData\\pred_organic\\N_out_table\\cut_csv\\data_chunk_3600000.csv',
 'D:\\ArcgisData\\pred_organic\\N_out_table\\cut_csv\\data_chunk_4000000.csv',
 'D:\\ArcgisData\\pred_organic\\N_out_table\\cut_csv\\data_chunk_4400000.csv']

In [71]:
# 预测数据存储位置
result_path = r"D:\ArcgisData\pred_organic\N_out_table\pre_csv"

In [72]:
n = 0
for one_table in sorted_files:
    data_df = pd.read_csv(one_table)
    temp_pred = predictor.predict(data_df)
    temp_pred.to_csv(os.path.join(result_path,f"{n}.csv"))
    n+=1
    print(n)

  y_pred[:, j] = num / denom


1


  y_pred[:, j] = num / denom


2


  y_pred[:, j] = num / denom


3


  y_pred[:, j] = num / denom


4


  y_pred[:, j] = num / denom


5


  y_pred[:, j] = num / denom


6


  y_pred[:, j] = num / denom


7


  y_pred[:, j] = num / denom


8


  y_pred[:, j] = num / denom


9


  y_pred[:, j] = num / denom


10


  y_pred[:, j] = num / denom


11


  y_pred[:, j] = num / denom


12


In [73]:
# 读取预测结果
pre_csv_list = [os.path.join(result_path,_) for _ in os.listdir(result_path)]
pre_csv_list = sorted(pre_csv_list,key=lambda x:int(x.rsplit('\\', -1)[-1].split('.')[0]))
pre_csv_list

['D:\\ArcgisData\\pred_organic\\N_out_table\\pre_csv\\0.csv',
 'D:\\ArcgisData\\pred_organic\\N_out_table\\pre_csv\\1.csv',
 'D:\\ArcgisData\\pred_organic\\N_out_table\\pre_csv\\2.csv',
 'D:\\ArcgisData\\pred_organic\\N_out_table\\pre_csv\\3.csv',
 'D:\\ArcgisData\\pred_organic\\N_out_table\\pre_csv\\4.csv',
 'D:\\ArcgisData\\pred_organic\\N_out_table\\pre_csv\\5.csv',
 'D:\\ArcgisData\\pred_organic\\N_out_table\\pre_csv\\6.csv',
 'D:\\ArcgisData\\pred_organic\\N_out_table\\pre_csv\\7.csv',
 'D:\\ArcgisData\\pred_organic\\N_out_table\\pre_csv\\8.csv',
 'D:\\ArcgisData\\pred_organic\\N_out_table\\pre_csv\\9.csv',
 'D:\\ArcgisData\\pred_organic\\N_out_table\\pre_csv\\10.csv',
 'D:\\ArcgisData\\pred_organic\\N_out_table\\pre_csv\\11.csv']

In [74]:
pre_df = pd.read_csv(pre_csv_list[0])
for one_pred in pre_csv_list[1:]:
    temp_df = pd.read_csv(one_pred)
    pre_df = pd.concat([pre_df,temp_df],axis=0)
    print(one_pred)

D:\ArcgisData\pred_organic\N_out_table\pre_csv\1.csv
D:\ArcgisData\pred_organic\N_out_table\pre_csv\2.csv
D:\ArcgisData\pred_organic\N_out_table\pre_csv\3.csv
D:\ArcgisData\pred_organic\N_out_table\pre_csv\4.csv
D:\ArcgisData\pred_organic\N_out_table\pre_csv\5.csv
D:\ArcgisData\pred_organic\N_out_table\pre_csv\6.csv
D:\ArcgisData\pred_organic\N_out_table\pre_csv\7.csv
D:\ArcgisData\pred_organic\N_out_table\pre_csv\8.csv
D:\ArcgisData\pred_organic\N_out_table\pre_csv\9.csv
D:\ArcgisData\pred_organic\N_out_table\pre_csv\10.csv
D:\ArcgisData\pred_organic\N_out_table\pre_csv\11.csv


In [75]:
# 保存完整的预测数据
pre_df.to_csv(os.path.join(r"D:\ArcgisData\pred_organic\N_out_table\merge_csv","result.csv"))

In [76]:
dem_array.shape,pre_df.size

((2481, 1849), 9174738)

In [77]:
pre_df.columns

Index(['Unnamed: 0', 'N'], dtype='object')

In [78]:
raster_array = np.reshape(pre_df['N'].values,dem_array.shape)
raster_array

array([[2.2934642, 2.2934575, 2.2934506, ..., 1.8250482, 1.8250508,
        1.8250535],
       [2.2934735, 2.2934668, 2.29346  , ..., 1.8250465, 1.8250492,
        1.8250518],
       [2.2934828, 2.293476 , 2.2934692, ..., 1.8250446, 1.8250475,
        1.8250501],
       ...,
       [1.9921412, 1.9921447, 1.9921482, ..., 1.8764555, 1.8764553,
        1.8764553],
       [1.9921442, 1.9921477, 1.9921513, ..., 1.8764565, 1.8764565,
        1.8764565],
       [1.9921472, 1.9921508, 1.9921542, ..., 1.8764577, 1.8764577,
        1.8764577]])

In [79]:
#
env.extent = "DEM"

In [81]:

# 将预测结果输出为栅格数据
out_raster = arcpy.NumPyArrayToRaster(
    raster_array,
    arcpy.Point(arcpy.env.extent.XMin, arcpy.env.extent.YMin),
    30,
    30,
)
out_raster.save("RESULT3")
print("完成")


完成
