In [10]:
import arcpy
from arcpy import env
import pandas as pd
import numpy as np
import os

In [2]:
# 数组整形
def resize_arrays(A, B, fill_value=0):
    """调整数组形状一致"""
    new_shape = (max(A.shape[0], B.shape[0]), max(A.shape[1], B.shape[1]))

    if A.shape != new_shape:
        if A.shape[0] < new_shape[0]:
            padding_rows = new_shape[0] - A.shape[0]
            padding = np.full((padding_rows, A.shape[1]), fill_value)
            A = np.vstack((A, padding))
        elif A.shape[0] > new_shape[0]:
            A = A[:new_shape[0], :]

        if A.shape[1] < new_shape[1]:
            pad_width = ((0, 0), (0, new_shape[1] - A.shape[1]))
            A = np.pad(A, pad_width, mode='constant', constant_values=fill_value)
        elif A.shape[1] > new_shape[1]:
            A = A[:, :new_shape[1]]
    
    if B.shape != new_shape:
        if B.shape[0] < new_shape[0]:
            padding_rows = new_shape[0] - B.shape[0]
            padding = np.full((padding_rows, B.shape[1]), fill_value)
            B = np.vstack((B, padding))
        elif B.shape[0] > new_shape[0]:
            B = B[:new_shape[0], :]

        if B.shape[1] < new_shape[1]:
            pad_width = ((0, 0), (0, new_shape[1] - B.shape[1]))
            B = np.pad(B, pad_width, mode='constant', constant_values=fill_value)
        elif B.shape[1] > new_shape[1]:
            B = B[:, :new_shape[1]]
    
    return A, B


In [3]:
# 设置工作环境
env.workspace = r"D:\ArcGISProjects\workspace\shbyq\feature_raster_file\features_data_ky.gdb"
arcpy.ListRasters()

['PRE',
 'SRA',
 'TMP',
 'VAP',
 'WIND',
 'BIO',
 'Contrast',
 'Correlation',
 'Dissimilarity',
 'Entropy',
 'Homogeneity',
 'Mean',
 'ndvi',
 'PCA_0',
 'PCA_1',
 'SecondMoment',
 'Variance',
 'LON',
 'LAT',
 'Aspect',
 'ChannelNetworkBaseLevel',
 'ChannelNetworkDistance',
 'ClosedDepressions',
 'ConvergenceIndex',
 'LSFactor',
 'PlanCurvature',
 'ProfileCurvature',
 'RelativeSlopePosition',
 'Slope',
 'TopographicPositionIndex',
 'TopographicWetnessIndex',
 'TotalCatchmentArea',
 'ValleyDepth',
 'DEM',
 'AnalyticalHillshading']

In [4]:
feature_numpyarray_dict = {}
# feature_list = ['BIO', 'PRE', 'SRA', 'TMP', 'VAP', 'WIN', 'NDVI', 'TDQS', 'LIGHT', 'AnalyticalHillshading', 'Aspect', 'ChannelNetworkBaseLevel', 'ChannelNetworkDistance', 'ClosedDepressions', 'ConvergenceIndex', 'LSFactor', 'PlanCurvature', 'ProfileCurvature', 'RelativeSlopePosition', 'Slope', 'TopographicWetnessIndex', 'TotalCatchmentArea', 'ValleyDepth', 'DEM', 'LON', 'LAT']
# feature_list = ['DEM', 'AnalyticalHillshading', 'ChannelNetworkDistance', 'PlanCurvature', 'ProfileCurvature', 'RelativeSlopePosition', 'Slope', 'ValleyDepth', 'PCA_0', 'PCA_1', 'PRE', 'SRA', 'BIO', 'LON', 'LAT']
feature_list = arcpy.ListRasters()
for one_raster in feature_list:
    feature_numpyarray_dict[one_raster] = arcpy.RasterToNumPyArray(one_raster)

In [5]:
# 检查形状是否一致
print(len(set([feature_numpyarray_dict[_].shape for _ in feature_numpyarray_dict.keys()])))

1


In [6]:
# 集成特征表
features_table = np.column_stack(tuple([feature_numpyarray_dict[_].flatten() for _ in feature_numpyarray_dict.keys()]))

In [7]:
features_table.size

2578067745

In [8]:
features_table.shape

(171871183, 15)

In [9]:
# 添加列名
data = pd.DataFrame(features_table,columns = feature_list)

In [10]:
# 保存为csv文件
import os
out_path = r"D:\ArcGISProjects\workspace\pred_feature_table\dy"
chunk_size = 500000
total_rows = data.shape[0]
for i in range(0, total_rows, chunk_size):
    start = i
    end = min(i + chunk_size, total_rows)
    filename =  os.path.join(out_path,f'data_chunk_{i}.csv') # 文件名格式可以根据您的需要进行修改
    df_chunk = data.iloc[start:end]
    df_chunk.to_csv(filename, index=False)
    print(i)

0
500000
1000000
1500000
2000000
2500000
3000000
3500000
4000000
4500000
5000000
5500000
6000000
6500000
7000000
7500000
8000000
8500000
9000000
9500000
10000000
10500000
11000000
11500000
12000000
12500000
13000000
13500000
14000000
14500000
15000000
15500000
16000000
16500000
17000000
17500000
18000000
18500000
19000000
19500000
20000000
20500000
21000000
21500000
22000000
22500000
23000000
23500000
24000000
24500000
25000000
25500000
26000000
26500000
27000000
27500000
28000000
28500000
29000000
29500000
30000000
30500000
31000000
31500000
32000000
32500000
33000000
33500000
34000000
34500000
35000000
35500000
36000000
36500000
37000000
37500000
38000000
38500000
39000000
39500000
40000000
40500000
41000000
41500000
42000000
42500000
43000000
43500000
44000000
44500000
45000000
45500000
46000000
46500000
47000000
47500000
48000000
48500000
49000000
49500000
50000000
50500000
51000000
51500000
52000000
52500000
53000000
53500000
54000000
54500000
55000000
55500000
56000000
56500000
5

In [6]:
# 创建输出路径
out_path = r"F:\cache_data\pred_feature_table\ky\feature_table\all"


In [7]:
flatten_list = [feature_numpyarray_dict[_].flatten() for _ in feature_numpyarray_dict.keys()]

In [11]:
# 定义每个小块的大小
chunk_size = 500000

# 获取特征数量
num_features = len(feature_numpyarray_dict)

# 获取行数（假设所有栅格数据的行数相同，以第一个为准）
num_rows = len(list(feature_numpyarray_dict.values())[0].flatten())

# 计算需要切割成多少块
num_chunks = num_rows // chunk_size + (num_rows % chunk_size > 0)
print(num_chunks)
# 逐块处理或导出
for i in range(num_chunks):
    
    start_idx = i * chunk_size
    end_idx = min((i + 1) * chunk_size, num_rows)
    
    # 获取当前块的数据
    # chunk_data = np.column_stack([feature_numpyarray_dict[feature].flatten()[start_idx:end_idx] for feature in feature_list])
    chunk_data = np.column_stack([_[start_idx:end_idx] for _ in flatten_list])
    # 转为pandas datafram
    data = pd.DataFrame(chunk_data,columns = feature_list)
    data.to_csv(os.path.join(out_path,f'data_chunk_{str(i).zfill(3)}.csv'))
    print(i)

335
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
27