In [1]:
import arcpy
from arcpy import env
import pandas as pd
import numpy as np
from autogluon.tabular import TabularPredictor
from pykrige.ok import OrdinaryKriging
from sklearn.metrics import r2_score

In [51]:
# 数组整形
def resize_arrays(A, B, fill_value=0):
    """调整数组形状一致"""
    new_shape = (max(A.shape[0], B.shape[0]), max(A.shape[1], B.shape[1]))

    if A.shape != new_shape:
        if A.shape[0] < new_shape[0]:
            padding_rows = new_shape[0] - A.shape[0]
            padding = np.full((padding_rows, A.shape[1]), fill_value)
            A = np.vstack((A, padding))
        elif A.shape[0] > new_shape[0]:
            A = A[:new_shape[0], :]

        if A.shape[1] < new_shape[1]:
            pad_width = ((0, 0), (0, new_shape[1] - A.shape[1]))
            A = np.pad(A, pad_width, mode='constant', constant_values=fill_value)
        elif A.shape[1] > new_shape[1]:
            A = A[:, :new_shape[1]]
    
    if B.shape != new_shape:
        if B.shape[0] < new_shape[0]:
            padding_rows = new_shape[0] - B.shape[0]
            padding = np.full((padding_rows, B.shape[1]), fill_value)
            B = np.vstack((B, padding))
        elif B.shape[0] > new_shape[0]:
            B = B[:new_shape[0], :]

        if B.shape[1] < new_shape[1]:
            pad_width = ((0, 0), (0, new_shape[1] - B.shape[1]))
            B = np.pad(B, pad_width, mode='constant', constant_values=fill_value)
        elif B.shape[1] > new_shape[1]:
            B = B[:, :new_shape[1]]
    
    return A, B


In [None]:
# 读取数据
data = pd.read_csv('../data/test_ph.csv')
data

In [None]:
# 删除缺失值
data.dropna(inplace=True)

In [None]:
# 划分测试数据
train_data = data.sample(frac=0.7,random_state=0)   # 70%的数据用于训练
test_data = data.drop(train_data.index)                 # 30%的数据用于测试
train_data.sample()

In [None]:
# label
label = 'ph'
train_data[label].describe()

In [2]:
# predictor = TabularPredictor(label=label).fit(train_data,time_limit=600)
predictor = TabularPredictor.load("./AutogluonModels/ag-20230731_030007/")

In [None]:
# 测试数据集
y_test_pred = predictor.predict(test_data.drop(columns=[label]))

In [None]:
# 训练数据集
y_train_pred = predictor.predict(train_data.drop(columns=[label]))
y_train = train_data[label]
y_test = test_data[label]

In [None]:
# 计算残差
residuals_test =y_train - y_train_pred
residuals_test

In [None]:
# # 高斯克里金
# OK = OrdinaryKriging(train_data['X'], train_data['Y'], residuals_test, variogram_model='gaussian')  # 高斯模型
# # 在测试数据上进行预测
# kriging_predictions_test, _ = OK.execute('points', test_data['X'], test_data['Y'])
# predictions_test = y_test + kriging_predictions_test

# 计算R2
r2 = r2_score(y_test, y_test_pred)
r2

In [52]:
# 设置工作环境
env.workspace = r"D:\ArcgisProject\result.gdb"
arcpy.ListRasters()

['SY_TWI_5',
 'SY_TPI_201',
 'SY_TPI_101',
 'SY_TPI_11',
 'SY_TPI_3',
 'SY_TMP_5',
 'SY_SLOP_5',
 'SY_PRE_5',
 'SY_NDVI_5',
 'SY_DZ_5',
 'SY_DEM_5',
 'SY_CUR_5',
 'SY_ASP_5',
 'plan_curve',
 'profile_curve',
 'SY_DLTB_RASTER']

In [53]:
for one_raster in arcpy.ListRasters():
    print(one_raster,arcpy.RasterToNumPyArray(one_raster).shape)

SY_TWI_5 (14884, 11094)
SY_TPI_201 (14884, 11094)
SY_TPI_101 (14884, 11094)
SY_TPI_11 (14884, 11094)
SY_TPI_3 (14884, 11094)
SY_TMP_5 (14884, 11094)
SY_SLOP_5 (14884, 11094)
SY_PRE_5 (14884, 11094)
SY_NDVI_5 (14884, 11094)
SY_DZ_5 (14884, 10750)
SY_DEM_5 (14884, 11094)
SY_CUR_5 (14884, 11094)
SY_ASP_5 (14884, 11094)
plan_curve (14884, 11094)
profile_curve (14884, 11094)
SY_DLTB_RASTER (14815, 11001)


In [54]:
dem_array = arcpy.RasterToNumPyArray("SY_DEM_5")
dl_array = arcpy.RasterToNumPyArray("SY_DLTB_RASTER")
dz_array = arcpy.RasterToNumPyArray("SY_DZ_5")

In [55]:
dl_array = resize_arrays(dem_array,dl_array,999)[1]
dz_array = resize_arrays(dem_array,dz_array,8)[1]
dl_array.shape,dz_array.shape

((14884, 11094), (14884, 11094))

In [59]:
twi5_flat = arcpy.RasterToNumPyArray("SY_TWI_5").flatten()
tpi201_flat = arcpy.RasterToNumPyArray("SY_TPI_201").flatten()
tpi101_flat = arcpy.RasterToNumPyArray("SY_TPI_101").flatten()
tpi11_flat = arcpy.RasterToNumPyArray("SY_TPI_11").flatten()
tmp5_flat = arcpy.RasterToNumPyArray("SY_TMP_5").flatten()
slope_flat = arcpy.RasterToNumPyArray("SY_SLOP_5").flatten()
pre_flat = arcpy.RasterToNumPyArray("SY_PRE_5").flatten()
ndvi_flat = arcpy.RasterToNumPyArray("SY_NDVI_5").flatten()
dz_flat = dz_array.flatten()
dem_flat = arcpy.RasterToNumPyArray("SY_DEM_5").flatten()
cur_flat = arcpy.RasterToNumPyArray("SY_CUR_5").flatten()
asp_flat = arcpy.RasterToNumPyArray("SY_ASP_5").flatten()
plcur_flat = arcpy.RasterToNumPyArray("plan_curve").flatten()
pocur_flat = arcpy.RasterToNumPyArray("profile_curve").flatten()
dl_flat = dl_array.flatten()

In [61]:
# 构造经纬度信息
desc = arcpy.Describe("SY_DEM_5")
origin_x = desc.extent.XMin
origin_y = desc.extent.YMax
pixel_width = desc.meanCellWidth
pixel_height = desc.meanCellHeight
print(origin_x,origin_y,pixel_width,pixel_height)

397387.5 3153427.5 5.0 5.0


In [64]:
# 经度
array_x = np.zeros(dem_array.shape, dtype=np.float32)
array_x[:, 0] = 397387.5
for i in range(1, dem_array.shape[1]):
    array_x[:, i] = array_x[:, i-1] + 5
print(array_x.shape,array_x[0,1],array_x[0,-1])

(14884, 11094) 397392.5 452852.5


In [63]:
# 纬度
array_y = np.zeros(dem_array.shape,dtype=np.float32)
array_y[0] = 3153427.5
for i in range(1, dem_array.shape[0]):
    array_y[i] = array_y[i-1] - 5
print(array_y.shape,array_y[0][0],array_y[-1][0])

(14884, 11094) 3153427.5 3079012.5


In [65]:
array_x_flat = array_x.flatten()
array_y_flat = array_y.flatten()

In [66]:
features2 = np.column_stack((array_x_flat,array_y_flat,twi5_flat,tpi201_flat,tpi101_flat,tpi11_flat,tmp5_flat,slope_flat,pre_flat,ndvi_flat,dz_flat,dem_flat,cur_flat,asp_flat,plcur_flat,pocur_flat,dl_flat))

In [67]:
features2.size

2807092632

In [68]:
xulian_data = pd.DataFrame(features2,columns=['X','Y','twi5','tpi201','tpi101','tpi11','tmp','slope','pre','ndvi','dz','dem','cur','asp','plcur','pocur','dl'])

In [None]:
xulian_data.to_csv('bigdata.csv')

In [None]:
test_data.drop(columns=[label])

In [11]:
xunlian_data = pd.DataFrame(features2,columns=['asp','slope','dem','twi','tpi'])
xunlian_data.size

5033420

In [12]:
result_pred = predictor.predict(xunlian_data)

In [15]:
dem_array =arcpy.RasterToNumPyArray("SY_DEM_fan")
dem_array

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)

In [16]:
#
env.extent = "SY_DEM_fan"
result_array = result_pred.values
raster_array = np.reshape(result_array,dem_array.shape)
raster_array

array([[6.509404, 6.509404, 6.509404, ..., 6.509404, 6.509404, 6.509404],
       [6.509404, 6.509404, 6.509404, ..., 6.509404, 6.509404, 6.509404],
       [6.509404, 6.509404, 6.509404, ..., 6.509404, 6.509404, 6.509404],
       ...,
       [6.509404, 6.509404, 6.509404, ..., 6.509404, 6.509404, 6.509404],
       [6.509404, 6.509404, 6.509404, ..., 6.509404, 6.509404, 6.509404],
       [6.509404, 6.509404, 6.509404, ..., 6.509404, 6.509404, 6.509404]],
      dtype=float32)

In [18]:
result_pred.to_csv("./pred.csv")

In [17]:

# 将预测结果输出为栅格数据
out_raster = arcpy.NumPyArrayToRaster(
    raster_array,
    arcpy.Point(arcpy.env.extent.XMin, arcpy.env.extent.YMin),
    5,
    5,
)
out_raster.save("RESULT")
print("完成")


完成


In [19]:
# 获取坐标
dem_array

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)

In [28]:
dem_array.shape
dem_array[0][0]

0.0

In [26]:

row_index = 0
col_index = 0
cell_size_x = arcpy.Describe("SY_DEM_fan").meanCellWidth
cell_size_y = arcpy.Describe("SY_DEM_fan").meanCellHeight
# x_coord = arcpy.Extent(arcpy.env.extent).XMin + (col_index + 0.5) * cell_size_x
# y_coord = arcpy.Extent(arcpy.env.extent).YMax - (row_index + 0.5) * cell_size_y
x_coord = arcpy.env.extent.XMin + (col_index + 0.5) * cell_size_x
y_coord = arcpy.env.extent.YMax - (row_index + 0.5) * cell_size_y

print("X坐标:", x_coord)
print("Y坐标:", y_coord)

X坐标: 418865.0
Y坐标: 3108760.0


In [44]:
A = np.zeros((2,2))
B = np.zeros((5,3))

In [46]:
a, b = resize_arrays(A,B,888)

In [48]:
b

array([[0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.]])

In [49]:
a

array([[  0.,   0., 888.],
       [  0.,   0., 888.],
       [888., 888., 888.],
       [888., 888., 888.],
       [888., 888., 888.]])

In [25]:
new_shape = (A.shape[0], A.shape[1])

if B.shape != new_shape:
    if B.shape[0] < new_shape[0]:
        padding_rows = new_shape[0] - B.shape[0]
        padding = np.zeros((padding_rows, B.shape[1]))
        B = np.vstack((B, padding))
    elif B.shape[0] > new_shape[0]:
        B = B[:new_shape[0], :]

    if B.shape[1] < new_shape[1]:
        pad_width = ((0, 0), (0, new_shape[1] - B.shape[1]))
        B = np.pad(B, pad_width, mode='constant', constant_values=0)
    elif B.shape[1] > new_shape[1]:
        B = B[:, :new_shape[1]]
