In [29]:
import os
import shutil
from sklearn.metrics import accuracy_score,silhouette_score
from sklearn.cluster import KMeans
import arcpy
import pandas as pd
from sklearn.preprocessing import StandardScaler
from arcpy import env
import numpy as np
from sklearn.cluster import BisectingKMeans,MiniBatchKMeans,kmeans_plusplus

In [7]:
env.workspace = r'D:\GEEDOWNLOAD\allfeatures'
raster_list = arcpy.ListRasters()

['DEM.tif', 'PRE.tif', 'TMPMAX.tif', 'TMPMIN.tif']

In [8]:
# 读取各个特征的array
dem_array = arcpy.RasterToNumPyArray('DEM.tif')
pre_array = arcpy.RasterToNumPyArray('PRE.tif')
tmpmax_array = arcpy.RasterToNumPyArray('TMPMAX.tif')
tmpmin_array = arcpy.RasterToNumPyArray('TMPMIN.tif')

In [9]:
print(dem_array.shape,pre_array.shape,tmpmax_array.shape,tmpmin_array.shape)

(17082, 22104) (17082, 22104) (17082, 22104) (17082, 22104)


In [11]:
shape1 = dem_array.shape

# 随机生成10000个索引
idx = np.unravel_index(np.random.choice(np.prod(shape1), 10000, replace=False), shape1)

In [16]:
dem_values = dem_array[idx]
pre_values = pre_array[idx]
tmpmax_values = tmpmax_array[idx]
tmpmin_values = tmpmin_array[idx]

In [17]:
df = pd.DataFrame({'dem':dem_values,'pre':pre_values,'tmpmax':tmpmax_values,'tmpmin':tmpmin_values})

In [27]:
data = df
scaler = StandardScaler()
data_scaled = scaler.fit_transform(data)
# 存储每个k值的WSS
wss = []

# 尝试的k的范围
k_range = range(2, 21)  # 尝试从1到10的k值

In [30]:

for k in k_range:
    kmeans = KMeans(n_clusters=k, random_state=42)
    kmeans.fit(data_scaled)
    wss.append(kmeans.inertia_)

In [32]:
kmeans = KMeans(n_clusters=4, random_state=42)
clusters = kmeans.fit_predict(data_scaled)

In [34]:
df['label'] = list(clusters)

In [35]:
df

Unnamed: 0,dem,pre,tmpmax,tmpmin,label
0,974,108,226,141,2
1,463,93,213,141,2
2,922,99,193,114,1
3,1821,85,169,86,3
4,1061,89,192,119,1
...,...,...,...,...,...
9995,643,104,199,127,2
9996,821,90,203,128,2
9997,509,104,213,137,2
9998,1438,92,169,99,3


In [37]:
dem_flatten = dem_array.flatten()
pre_flatten = pre_array.flatten()
tmpmax_flatten = tmpmax_array.flatten()
tmpmin_flatten = tmpmin_array.flatten()

In [40]:
predictor_data = pd.DataFrame({'dem':dem_flatten,'pre':pre_flatten,'tmpmax':tmpmax_flatten,'tmpmin':tmpmin_flatten})

In [42]:
# 标准化

scaler_predictor_data = scaler.fit_transform(predictor_data)
scaler_predictor_data

array([[ 0.18258835,  1.37361909,  0.08858858,  0.11745266],
       [ 0.18258835,  1.37361909,  0.08858858,  0.11745266],
       [ 0.171352  ,  1.37361909,  0.08858858,  0.11745266],
       ...,
       [-1.70699109,  3.73529944,  2.44704659,  2.33259474],
       [-1.70137291,  3.73529944,  2.44704659,  2.33259474],
       [-1.69762746,  3.73529944,  2.44704659,  2.33259474]])

In [43]:
# 预测标签
predictor_value = kmeans.fit_predict(scaler_predictor_data)

In [45]:
predictor_value.size

377580528

In [47]:
# 重塑形状
raster_array = np.reshape(predictor_value,dem_array.shape)
raster_array

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [1, 1, 1, ..., 3, 3, 3],
       [1, 1, 1, ..., 3, 3, 3],
       [1, 1, 1, ..., 3, 3, 3]])

In [48]:
# 空间范围
env.extent = "DEM.tif"

In [55]:
# 使用Describe函数获取栅格的描述
raster_desc = arcpy.Describe('DEM.tif')

# 获取像元宽度和高度
cell_width = raster_desc.meanCellWidth
cell_height = raster_desc.meanCellHeight
cell_width,cell_height

(0.0002694945852358513, 0.0002694945852358511)

In [58]:
# 将预测结果输出为栅格数据
out_raster = arcpy.NumPyArrayToRaster(
    raster_array,
    arcpy.Point(arcpy.env.extent.XMin, arcpy.env.extent.YMin),
    cell_width,
    cell_height
)
out_raster.save("RESULT2.tif")
print("完成")


完成
