In [1]:
import arcpy
from arcpy import env
import pandas as pd
import numpy as np
from autogluon.tabular import TabularPredictor
import pickle

In [3]:
# 数组整形
def resize_arrays(A, B, fill_value=0):
    """调整数组形状一致"""
    new_shape = (max(A.shape[0], B.shape[0]), max(A.shape[1], B.shape[1]))

    if A.shape != new_shape:
        if A.shape[0] < new_shape[0]:
            padding_rows = new_shape[0] - A.shape[0]
            padding = np.full((padding_rows, A.shape[1]), fill_value)
            A = np.vstack((A, padding))
        elif A.shape[0] > new_shape[0]:
            A = A[:new_shape[0], :]

        if A.shape[1] < new_shape[1]:
            pad_width = ((0, 0), (0, new_shape[1] - A.shape[1]))
            A = np.pad(A, pad_width, mode='constant', constant_values=fill_value)
        elif A.shape[1] > new_shape[1]:
            A = A[:, :new_shape[1]]
    
    if B.shape != new_shape:
        if B.shape[0] < new_shape[0]:
            padding_rows = new_shape[0] - B.shape[0]
            padding = np.full((padding_rows, B.shape[1]), fill_value)
            B = np.vstack((B, padding))
        elif B.shape[0] > new_shape[0]:
            B = B[:new_shape[0], :]

        if B.shape[1] < new_shape[1]:
            pad_width = ((0, 0), (0, new_shape[1] - B.shape[1]))
            B = np.pad(B, pad_width, mode='constant', constant_values=fill_value)
        elif B.shape[1] > new_shape[1]:
            B = B[:, :new_shape[1]]
    
    return A, B
# 掩膜提取
def mask_raster(array,mask_ele,cell_size):
    out_raster = arcpy.NumPyArrayToRaster(
    array,
    arcpy.Point(arcpy.env.extent.XMin, arcpy.env.extent.YMin),
    cell_size,
    cell_size,
)
    """按掩膜提取栅格,空间参考设定为:CGCS2000_3_Degree_GK_CM_108E"""
    output_coordinate_system = arcpy.Describe(mask_ele).spatialReference
    with arcpy.EnvManager(outputCoordinateSystem=output_coordinate_system,snapRaster=mask_ele, cellSize=mask_ele):
        result_raster = arcpy.sa.ExtractByMask(out_raster, mask_ele, "INSIDE")
        return result_raster
# 数组整形

In [41]:
# 加载模型
modle_path = r"D:\ArcgisData\pred_tl\pred_moudle\rfmodel_test.pkl"
with open(modle_path, 'rb') as f:
    predictor = pickle.load(f)

In [42]:
predictor.feature_importances_

array([0.11452727, 0.07394894, 0.06652614, 0.100425  , 0.08745574,
       0.08328563, 0.07299646, 0.08405847, 0.09352639, 0.10190037,
       0.08690169, 0.0344479 ])

In [4]:
# 设置工作环境
env.workspace = r"D:\ArcgisData\basedata\basetrain_5m.gdb"
arcpy.ListRasters()

['TWI5',
 'TPI201',
 'TPI101',
 'TPI11',
 'TPI3',
 'TMP',
 'SOILQS',
 'SLOP',
 'PRE',
 'NIGTH',
 'NDVI',
 'DEM',
 'CUR',
 'ASP',
 'PLCUR',
 'POCUR',
 'OSJL',
 'DZ',
 'DL',
 'LON',
 'LAT',
 'PH',
 'SC',
 'SOM']

In [44]:
# 过滤所用的特征因子
check_list = ['A', 'DEM', 'TWI5', 'TPI101', 'TMP', 'SLOP', 'PRE', 'NIGTH', 'NDVI',
       'LAT', 'LON', 'DZ', 'DL']
feature_list = [_ for _ in arcpy.ListRasters() if str(_).replace("_","") in check_list ]
feature_list,len(feature_list),len(check_list)

(['DEM',
  'TWI_5',
  'TPI_101',
  'TMP',
  'SLOP',
  'PRE',
  'NIGTH',
  'NDVI',
  'LAT',
  'LON',
  'DZ',
  'DL'],
 12,
 13)

In [7]:
# for one_raster in feature_list:
#     print(one_raster,arcpy.RasterToNumPyArray(one_raster).shape)
for one_raster in arcpy.ListRasters():
    print(one_raster,arcpy.RasterToNumPyArray(one_raster).shape)

TWI5 (14884, 11094)
TPI201 (14884, 11094)
TPI101 (14884, 11094)
TPI11 (14884, 11094)
TPI3 (14884, 11094)
TMP (14884, 11094)
SOILQS (14884, 11094)
SLOP (14884, 11094)
PRE (14884, 11094)
NIGTH (14884, 11094)
NDVI (14884, 11094)
DEM (14884, 11094)
CUR (14884, 11094)
ASP (14884, 11094)
PLCUR (14884, 11094)
POCUR (14884, 11094)
OSJL (14816, 11002)
DZ (14884, 11094)
DL (14815, 11002)
LON (14884, 11094)
LAT (14884, 11094)
PH (14884, 11094)
SC (14817, 11002)
SOM (14886, 11094)
SOM1 (14886, 11094)


In [8]:
dem_array = arcpy.RasterToNumPyArray("DEM")
som_array = arcpy.RasterToNumPyArray("SOM")
# dl_array = arcpy.RasterToNumPyArray("DL")
# dz_array = arcpy.RasterToNumPyArray("DZ")
# osjl_array = arcpy.RasterToNumPyArray("OSJL")
dem_array.shape, som_array.shape

((14884, 11094), (14886, 11094))

In [9]:
# dz_array = resize_arrays(dem_array,dz_array,8)[1]
# dl_array = resize_arrays(dem_array,dz_array,9)[1]
# dz_array.shape,dl_array.shape
# osjl_array = resize_arrays(dem_array,osjl_array,0)[1]
# osjl_array.shape
som_array = resize_arrays(dem_array,som_array,0)[1]
som_array.shape

(14886, 11094)

In [46]:
# 构造array
dem = arcpy.RasterToNumPyArray("DEM").flatten()
twi = arcpy.RasterToNumPyArray("TWI_5").flatten()
tpi201 = arcpy.RasterToNumPyArray("TPI_201").flatten()
tpi101 = arcpy.RasterToNumPyArray("TPI_101").flatten()
tpi11 = arcpy.RasterToNumPyArray("TPI_11").flatten()
tpi3 = arcpy.RasterToNumPyArray("TPI_3").flatten()
tmp = arcpy.RasterToNumPyArray("TMP").flatten()
soilqs = arcpy.RasterToNumPyArray("SOILQS").flatten()
slop = arcpy.RasterToNumPyArray("SLOP").flatten()
pre = arcpy.RasterToNumPyArray("PRE").flatten()
night = arcpy.RasterToNumPyArray("NIGTH").flatten()
ndvi = arcpy.RasterToNumPyArray("NDVI").flatten()
cur = arcpy.RasterToNumPyArray("CUR").flatten()
asp = arcpy.RasterToNumPyArray("ASP").flatten()
plcur = arcpy.RasterToNumPyArray("PLCUR").flatten()
pocur = arcpy.RasterToNumPyArray("POCUR").flatten()
# osjl = osjl_array.flatten()
lat = arcpy.RasterToNumPyArray("LAT").flatten()
lon =  arcpy.RasterToNumPyArray("LON").flatten()
dz = arcpy.RasterToNumPyArray("DZ").flatten()
dl = arcpy.RasterToNumPyArray("DL").flatten()
tri = arcpy.RasterToNumPyArray("TRI").flatten()

In [13]:
np.min(arcpy.RasterToNumPyArray("OSJL"))

0.0

In [7]:
# 构造经纬度信息
desc = arcpy.Describe("DEM")
origin_x = desc.extent.XMin
origin_y = desc.extent.YMax
pixel_width = desc.meanCellWidth
pixel_height = desc.meanCellHeight
print(origin_x,origin_y,pixel_width,pixel_height)

397387.5 3153427.5 5.0 5.0


In [8]:
# 经度
array_x = np.zeros(dem_array.shape, dtype=np.float32)
array_x[:, 0] = 397387.5+(pixel_width/2)
for i in range(1, dem_array.shape[1]):
    array_x[:, i] = array_x[:, i-1] + pixel_width
print(array_x.shape,array_x[0,1],array_x[0,-1])

(14884, 11094) 397395.0 452855.0


In [9]:
# 纬度
array_y = np.zeros(dem_array.shape,dtype=np.float32)
array_y[0] = 3153427.5-(pixel_height/2)
for i in range(1, dem_array.shape[0]):
    array_y[i] = array_y[i-1] - pixel_height
print(array_y.shape,array_y[0][0],array_y[-1][0])

(14884, 11094) 3153425.0 3079010.0


In [28]:
x = array_x.flatten()
y = array_y.flatten()

In [47]:
# features2 = np.column_stack((x,y,asp,dem,dl,ndvi,nigth,pre,slope,soilqs,tmp,tpi11,tpi101,tpi201,tpi3,twi5,dz))
['A', 'DEM', 'TWI5', 'TPI101', 'TMP', 'SLOP', 'PRE', 'NIGTH', 'NDVI',
       'LAT', 'LON', 'DZ', 'DL']
features2 = np.column_stack((dem,twi,tpi101,tmp,slop,pre,night,ndvi,lat,lon,dz,dl))

In [48]:
features2.size

55048428

In [49]:
features2[300000]

array([1.64787500e+03, 3.99651480e+00, 1.37606201e+01, 1.22750000e+02,
       1.55016794e+01, 9.12500000e+02, 1.59999996e-01, 3.24000000e+02,
       3.12418000e+06, 4.80640000e+05, 6.00000000e+00, 6.00000000e+00])

In [50]:
# xulian_data = pd.DataFrame(features2,columns=['X','Y','ASP','DEM','DL','NDVI','NIGHT','PRE','SLOPE','SOILQS','TMP','TPI11','TPI101','TPI201','TPI3','TWI5','DZ'])

xulian_data = pd.DataFrame(features2,columns=['DEM', 'TWI5', 'TPI101', 'TMP', 'SLOP', 'PRE', 'NIGTH', 'NDVI',
       'LAT', 'LON', 'DZ', 'DL'])

In [51]:
xulian_data.shape

(4587369, 12)

In [52]:
xulian_data.dtypes

DEM       float64
TWI5      float64
TPI101    float64
TMP       float64
SLOP      float64
PRE       float64
NIGTH     float64
NDVI      float64
LAT       float64
LON       float64
DZ        float64
DL        float64
dtype: object

In [53]:
xulian_data['DL'] = xulian_data['DL'].astype(str)
xulian_data['DZ'] = xulian_data['DZ'].astype(str)

In [54]:
xulian_data.dtypes

DEM       float64
TWI5      float64
TPI101    float64
TMP       float64
SLOP      float64
PRE       float64
NIGTH     float64
NDVI      float64
LAT       float64
LON       float64
DZ         object
DL         object
dtype: object

In [55]:
xulian_data.describe()

Unnamed: 0,DEM,TWI5,TPI101,TMP,SLOP,PRE,NIGTH,NDVI,LAT,LON
count,4587369.0,4587369.0,4587369.0,4587369.0,4587369.0,4587369.0,4587369.0,4587369.0,4587369.0,4587369.0
mean,658.6016,2.957591,0.02291479,91.0307,16.41409,517.572,0.2461364,1472.668,1801334.0,331421.7
std,545.9478,2.687172,23.67683,72.24962,17.07998,408.3257,1.025573,2307.893,1422068.0,268567.7
min,0.0,-0.04508924,-239.5056,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,-5.968567,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,866.0,3.428443,0.0,136.9167,13.08498,821.1667,0.22,183.0,2814400.0,457960.0
75%,1110.025,4.885564,5.534058,152.25,29.74471,839.9167,0.28,2311.0,2946160.0,552100.0
max,1806.2,25.08574,180.8566,169.6667,84.59243,932.6667,44.69,9773.0,3152800.0,729220.0


In [56]:
import os
out_path = r"D:\ArcgisData\pred_tl\pred_table\cut_csv"
chunk_size = 400000
total_rows = xulian_data.shape[0]
for i in range(0, total_rows, chunk_size):
    start = i
    end = min(i + chunk_size, total_rows)
    filename =  os.path.join(out_path,f'data_chunk_{i}.csv') # 文件名格式可以根据您的需要进行修改
    df_chunk = xulian_data.iloc[start:end]
    df_chunk.to_csv(filename, index=False)
    print(i)



0
400000
800000
1200000
1600000
2000000
2400000
2800000
3200000
3600000
4000000
4400000


In [57]:
# 读取特征表
table_list = [os.path.join(out_path,_) for _ in os.listdir(out_path)]
table_list,len(table_list)

(['D:\\ArcgisData\\pred_tl\\pred_table\\cut_csv\\data_chunk_0.csv',
  'D:\\ArcgisData\\pred_tl\\pred_table\\cut_csv\\data_chunk_1200000.csv',
  'D:\\ArcgisData\\pred_tl\\pred_table\\cut_csv\\data_chunk_1600000.csv',
  'D:\\ArcgisData\\pred_tl\\pred_table\\cut_csv\\data_chunk_2000000.csv',
  'D:\\ArcgisData\\pred_tl\\pred_table\\cut_csv\\data_chunk_2400000.csv',
  'D:\\ArcgisData\\pred_tl\\pred_table\\cut_csv\\data_chunk_2800000.csv',
  'D:\\ArcgisData\\pred_tl\\pred_table\\cut_csv\\data_chunk_3200000.csv',
  'D:\\ArcgisData\\pred_tl\\pred_table\\cut_csv\\data_chunk_3600000.csv',
  'D:\\ArcgisData\\pred_tl\\pred_table\\cut_csv\\data_chunk_400000.csv',
  'D:\\ArcgisData\\pred_tl\\pred_table\\cut_csv\\data_chunk_4000000.csv',
  'D:\\ArcgisData\\pred_tl\\pred_table\\cut_csv\\data_chunk_4400000.csv',
  'D:\\ArcgisData\\pred_tl\\pred_table\\cut_csv\\data_chunk_800000.csv'],
 12)

In [58]:
# 排序列表

# 使用lambda函数将文件名按照最后一个下划线后面的数字大小进行排序
sorted_files = sorted(table_list, key=lambda x: int(x.rsplit('_', 1)[-1].split('.')[0]))
sorted_files

['D:\\ArcgisData\\pred_tl\\pred_table\\cut_csv\\data_chunk_0.csv',
 'D:\\ArcgisData\\pred_tl\\pred_table\\cut_csv\\data_chunk_400000.csv',
 'D:\\ArcgisData\\pred_tl\\pred_table\\cut_csv\\data_chunk_800000.csv',
 'D:\\ArcgisData\\pred_tl\\pred_table\\cut_csv\\data_chunk_1200000.csv',
 'D:\\ArcgisData\\pred_tl\\pred_table\\cut_csv\\data_chunk_1600000.csv',
 'D:\\ArcgisData\\pred_tl\\pred_table\\cut_csv\\data_chunk_2000000.csv',
 'D:\\ArcgisData\\pred_tl\\pred_table\\cut_csv\\data_chunk_2400000.csv',
 'D:\\ArcgisData\\pred_tl\\pred_table\\cut_csv\\data_chunk_2800000.csv',
 'D:\\ArcgisData\\pred_tl\\pred_table\\cut_csv\\data_chunk_3200000.csv',
 'D:\\ArcgisData\\pred_tl\\pred_table\\cut_csv\\data_chunk_3600000.csv',
 'D:\\ArcgisData\\pred_tl\\pred_table\\cut_csv\\data_chunk_4000000.csv',
 'D:\\ArcgisData\\pred_tl\\pred_table\\cut_csv\\data_chunk_4400000.csv']

In [59]:
# 预测数据存储位置
result_path = r"D:\ArcgisData\pred_tl\pred_table\pre_csv"

In [60]:
# rf tabular prediction
n = 0
for one_table in sorted_files:
    data_df = pd.read_csv(one_table)
    temp_pred = predictor.predict(data_df)
    temp_pred = pd.DataFrame(temp_pred,columns=['A'])
    temp_pred.to_csv(os.path.join(result_path,f"{n}.csv"))
    n+=1
    print(n)

1
2
3
4
5
6
7
8
9
10
11
12


In [61]:
# autogluon tabular prediction
n = 0
for one_table in sorted_files:
    data_df = pd.read_csv(one_table)
    temp_pred = predictor.predict(data_df)
    temp_pred.to_csv(os.path.join(result_path,f"{n}.csv"))
    n+=1
    print(n)

AttributeError: 'numpy.ndarray' object has no attribute 'to_csv'

In [62]:
# 读取预测结果
pre_csv_list = [os.path.join(result_path,_) for _ in os.listdir(result_path)]
pre_csv_list = sorted(pre_csv_list,key=lambda x:int(x.rsplit('\\', -1)[-1].split('.')[0]))
pre_csv_list

['D:\\ArcgisData\\pred_tl\\pred_table\\pre_csv\\0.csv',
 'D:\\ArcgisData\\pred_tl\\pred_table\\pre_csv\\1.csv',
 'D:\\ArcgisData\\pred_tl\\pred_table\\pre_csv\\2.csv',
 'D:\\ArcgisData\\pred_tl\\pred_table\\pre_csv\\3.csv',
 'D:\\ArcgisData\\pred_tl\\pred_table\\pre_csv\\4.csv',
 'D:\\ArcgisData\\pred_tl\\pred_table\\pre_csv\\5.csv',
 'D:\\ArcgisData\\pred_tl\\pred_table\\pre_csv\\6.csv',
 'D:\\ArcgisData\\pred_tl\\pred_table\\pre_csv\\7.csv',
 'D:\\ArcgisData\\pred_tl\\pred_table\\pre_csv\\8.csv',
 'D:\\ArcgisData\\pred_tl\\pred_table\\pre_csv\\9.csv',
 'D:\\ArcgisData\\pred_tl\\pred_table\\pre_csv\\10.csv',
 'D:\\ArcgisData\\pred_tl\\pred_table\\pre_csv\\11.csv']

In [63]:
pre_df = pd.read_csv(pre_csv_list[0])
for one_pred in pre_csv_list[1:]:
    temp_df = pd.read_csv(one_pred)
    pre_df = pd.concat([pre_df,temp_df],axis=0)
    print(one_pred)

D:\ArcgisData\pred_tl\pred_table\pre_csv\1.csv
D:\ArcgisData\pred_tl\pred_table\pre_csv\2.csv
D:\ArcgisData\pred_tl\pred_table\pre_csv\3.csv
D:\ArcgisData\pred_tl\pred_table\pre_csv\4.csv
D:\ArcgisData\pred_tl\pred_table\pre_csv\5.csv
D:\ArcgisData\pred_tl\pred_table\pre_csv\6.csv
D:\ArcgisData\pred_tl\pred_table\pre_csv\7.csv
D:\ArcgisData\pred_tl\pred_table\pre_csv\8.csv
D:\ArcgisData\pred_tl\pred_table\pre_csv\9.csv
D:\ArcgisData\pred_tl\pred_table\pre_csv\10.csv
D:\ArcgisData\pred_tl\pred_table\pre_csv\11.csv


In [64]:
# 保存完整的预测数据
pre_df.to_csv(os.path.join(r"D:\ArcgisData\pred_tl\pred_table\merge_csv","result.csv"))

In [65]:
pre_df.size,len(pre_df)

(9174738, 4587369)

In [66]:
pre_df.columns

Index(['Unnamed: 0', 'A'], dtype='object')

In [67]:
pre_df['category_encoded'] = pd.factorize(pre_df['A'])[0]

In [68]:
pre_df['category_encoded'] = pre_df['category_encoded'].astype('float32')
pre_df.dtypes

Unnamed: 0            int64
A                     int64
category_encoded    float32
dtype: object

In [69]:
raster_array = np.reshape(pre_df['category_encoded'].values,arcpy.RasterToNumPyArray("DEM").shape)
raster_array

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)

In [13]:
# 
env.extent = "DEM"

In [11]:
# 掩膜提取
def mask_raster(array,mask_ele,cell_size):
    out_raster = arcpy.NumPyArrayToRaster(
    array,
    arcpy.Point(arcpy.env.extent.XMin, arcpy.env.extent.YMin),
    cell_size,
    cell_size,
)
    """按掩膜提取栅格,空间参考设定为:CGCS2000_3_Degree_GK_CM_108E"""
    output_coordinate_system = arcpy.Describe(mask_ele).spatialReference
    with arcpy.EnvManager(outputCoordinateSystem=output_coordinate_system,snapRaster=mask_ele, cellSize=mask_ele):
        result_raster = arcpy.sa.ExtractByMask(out_raster, mask_ele, "INSIDE")
        return result_raster

In [15]:
# 按掩膜处理
# result_path = r"D:\ArcgisData\pred_tl\pred_database\TL_basedata.gdb"
result_raster = mask_raster(array_y,"DEM", 5)
result_raster.save("LAT")
print("完成")

完成
