In [1]:
# 初始化设置 - 只需运行一次
import os
import ee
import geemap

# 设置代理
os.environ['HTTP_PROXY'] = 'http://127.0.0.1:7890'
os.environ['HTTPS_PROXY'] = 'http://127.0.0.1:7890'

# 初始化GEE
geemap.ee_initialize()

# 定义函数
def getS2ImageCol(roi, s2, dataRage):
    def rmCloudByQA(image):
        qa = image.select('QA60')
        cloudBitMask = 1 << 10
        cirrusBitMask = 1 << 11
        mask = qa.bitwiseAnd(cloudBitMask).eq(0).And(qa.bitwiseAnd(cirrusBitMask).eq(0))
        return image.updateMask(mask)
    def reScale(image):
        return image.divide(10000)
    s2Col = (ee.ImageCollection(s2).filterBounds(roi).filterDate(dataRage)
             .map(rmCloudByQA).select(["B2", "B3", "B4", "B5", "B6", "B7", "B8", "B8A", "B11", "B12"])
             .map(reScale))
    return s2Col

def getAlphaEarthImage(roi, year):
    important_bands = ["A31", "A63", "A36", "A47", "A46"]
    start_date = str(year) + '-01-01'
    end_date = str(year + 1) + '-01-01'
    alphaearth_img = (ee.ImageCollection(alphaearth_collection)
                      .filterDate(start_date, end_date).filterBounds(roi)
                      .mean().select(important_bands).clip(roi))
    return alphaearth_img

# 设置基本参数
year = 2022
start = str(year)+'-09-01'
end = str(year)+'-12-31'
dataRage = ee.DateRange(start, end)
s2 = "COPERNICUS/S2_SR_HARMONIZED"
alphaearth_collection = "GOOGLE/SATELLITE_EMBEDDING/V1/ANNUAL"

# 定义ROI和数据
points_collection = ee.FeatureCollection("projects/ee-wang/assets/pnt")
roi = points_collection.geometry().bounds()
s2ImageCol = getS2ImageCol(roi, s2, dataRage).median().clip(roi)
alphaearth_img = getAlphaEarthImage(roi, year)
combined_img = s2ImageCol.addBands(alphaearth_img)
radius = 2
kernel = ee.Kernel.square(radius, 'pixels', False)
img_array = ee.Image(combined_img).neighborhoodToArray(kernel)

# 为所有点添加索引号用于分组
def addIndex(feature):
    return feature.set('index', ee.Number(feature.get('system:index')).format('%05d'))

all_points = points_collection.randomColumn('random_seed', 42).sort('random_seed')
all_points_list = all_points.toList(13000)  # 转换为列表以便索引访问
print('✅ 初始化完成，可以开始运行下面的代码块')

✅ 初始化完成，可以开始运行下面的代码块


In [4]:
# 导出第1个文件 (样本 0-999)
print('正在导出第1个文件...')
points_1 = ee.FeatureCollection(all_points_list.slice(500, 1000)).select(['landcover']).randomColumn('random', 1)
result_1 = img_array.sampleRegions(collection=points_1, scale=90)
geemap.ee_to_csv(result_1, filename='data/water_CNN_with_AlphaEarth1.csv')
print('✅ 成功导出: data/water_CNN_with_AlphaEarth1_1.csv')

正在导出第1个文件...
✅ 成功导出: data/water_CNN_with_AlphaEarth1_1.csv


In [7]:
# 导出第2个文件 (样本 1000-1999)
print('正在导出第2个文件...')
points_2 = ee.FeatureCollection(all_points_list.slice(1000, 2000)).select(['landcover']).randomColumn('random', 1)
result_2 = img_array.sampleRegions(collection=points_2, scale=90)
geemap.ee_to_csv(result_2, filename='data/water_CNN_with_AlphaEarth2.csv')
print('✅ 成功导出: data/water_CNN_with_AlphaEarth2.csv')

正在导出第2个文件...
✅ 成功导出: data/water_CNN_with_AlphaEarth2.csv


In [4]:
# 导出第3个文件 (样本 2000-2999)
print('正在导出第3个文件...')
points_3 = ee.FeatureCollection(all_points_list.slice(2500, 3000)).select(['landcover']).randomColumn('random', 1)
result_3 = img_array.sampleRegions(collection=points_3, scale=90)
geemap.ee_to_csv(result_3, filename='data/water_CNN_with_AlphaEarth3.csv')
print('✅ 成功导出: data/water_CNN_with_AlphaEarth3_1.csv')

正在导出第3个文件...
✅ 成功导出: data/water_CNN_with_AlphaEarth3_1.csv


In [9]:
# 导出第4个文件 (样本 3000-3999)
print('正在导出第4个文件...')
points_4 = ee.FeatureCollection(all_points_list.slice(3000, 4000)).select(['landcover']).randomColumn('random', 1)
result_4 = img_array.sampleRegions(collection=points_4, scale=90)
geemap.ee_to_csv(result_4, filename='data/water_CNN_with_AlphaEarth4.csv')
print('✅ 成功导出: data/water_CNN_with_AlphaEarth4.csv')

正在导出第4个文件...
✅ 成功导出: data/water_CNN_with_AlphaEarth4.csv


In [6]:
# 导出第5个文件 (样本 4000-4999)
print('正在导出第5个文件...')
points_5 = ee.FeatureCollection(all_points_list.slice(4500, 5000)).select(['landcover']).randomColumn('random', 1)
result_5 = img_array.sampleRegions(collection=points_5, scale=90)
geemap.ee_to_csv(result_5, filename='data/water_CNN_with_AlphaEarth5.csv')
print('✅ 成功导出: data/water_CNN_with_AlphaEarth5_1.csv')

正在导出第5个文件...
✅ 成功导出: data/water_CNN_with_AlphaEarth5_1.csv


In [9]:
# 导出第6个文件 (样本 5000-5999)
print('正在导出第6个文件...')
points_6 = ee.FeatureCollection(all_points_list.slice(5500, 6000)).select(['landcover']).randomColumn('random', 1)
result_6 = img_array.sampleRegions(collection=points_6, scale=90)
geemap.ee_to_csv(result_6, filename='data/water_CNN_with_AlphaEarth6.csv')
print('✅ 成功导出: data/water_CNN_with_AlphaEarth6_1.csv')

正在导出第6个文件...
✅ 成功导出: data/water_CNN_with_AlphaEarth6_1.csv


In [12]:
# 导出第7个文件 (样本 6000-6999)
print('正在导出第7个文件...')
points_7 = ee.FeatureCollection(all_points_list.slice(6500, 7000)).select(['landcover']).randomColumn('random', 1)
result_7 = img_array.sampleRegions(collection=points_7, scale=90)
geemap.ee_to_csv(result_7, filename='data/water_CNN_with_AlphaEarth7.csv')
print('✅ 成功导出: data/water_CNN_with_AlphaEarth7_1.csv')

正在导出第7个文件...
✅ 成功导出: data/water_CNN_with_AlphaEarth7_1.csv


In [13]:
# 导出第8个文件 (样本 7000-7999)
print('正在导出第8个文件...')
points_8 = ee.FeatureCollection(all_points_list.slice(7000, 8000)).select(['landcover']).randomColumn('random', 1)
result_8 = img_array.sampleRegions(collection=points_8, scale=90)
geemap.ee_to_csv(result_8, filename='data/water_CNN_with_AlphaEarth8.csv')
print('✅ 成功导出: data/water_CNN_with_AlphaEarth8.csv')

正在导出第8个文件...
✅ 成功导出: data/water_CNN_with_AlphaEarth8.csv


In [8]:
# 导出第9个文件 (样本 8000-8999)
print('正在导出第9个文件...')
points_9 = ee.FeatureCollection(all_points_list.slice(8800, 9000)).select(['landcover']).randomColumn('random', 1)
result_9 = img_array.sampleRegions(collection=points_9, scale=90)
geemap.ee_to_csv(result_9, filename='data/water_CNN_with_AlphaEarth9_4.csv')
print('✅ 成功导出: data/water_CNN_with_AlphaEarth9.csv')

正在导出第9个文件...
✅ 成功导出: data/water_CNN_with_AlphaEarth9.csv


In [13]:
# 导出第10个文件 (样本 9000-9999)
print('正在导出第10个文件...')
points_10 = ee.FeatureCollection(all_points_list.slice(9600, 10000)).select(['landcover']).randomColumn('random', 1)
result_10 = img_array.sampleRegions(collection=points_10, scale=90)
geemap.ee_to_csv(result_10, filename='data/water_CNN_with_AlphaEarth10_2.csv')
print('✅ 成功导出: data/water_CNN_with_AlphaEarth10.csv')

正在导出第10个文件...
✅ 成功导出: data/water_CNN_with_AlphaEarth10.csv


In [19]:
# 导出第11个文件 (样本 10000-10999)
print('正在导出第11个文件...')
points_11 = ee.FeatureCollection(all_points_list.slice(10600, 11000)).select(['landcover']).randomColumn('random', 1)
result_11 = img_array.sampleRegions(collection=points_11, scale=90)
geemap.ee_to_csv(result_11, filename='data/water_CNN_with_AlphaEarth11_2.csv')
print('✅ 成功导出: data/water_CNN_with_AlphaEarth11.csv')

正在导出第11个文件...
✅ 成功导出: data/water_CNN_with_AlphaEarth11.csv


In [20]:
# 导出第12个文件 (样本 11000-11999)
print('正在导出第12个文件...')
points_12 = ee.FeatureCollection(all_points_list.slice(11000, 12000)).select(['landcover']).randomColumn('random', 1)
result_12 = img_array.sampleRegions(collection=points_12, scale=90)
geemap.ee_to_csv(result_12, filename='data/water_CNN_with_AlphaEarth12.csv')
print('✅ 成功导出: data/water_CNN_with_AlphaEarth12.csv')

正在导出第12个文件...
✅ 成功导出: data/water_CNN_with_AlphaEarth12.csv


In [22]:
# 导出第13个文件 (样本 12000-12699，剩余700个样本)
print('正在导出第13个文件...')
points_13 = ee.FeatureCollection(all_points_list.slice(12300, 12700)).select(['landcover']).randomColumn('random', 1)
result_13 = img_array.sampleRegions(collection=points_13, scale=90)
geemap.ee_to_csv(result_13, filename='data/water_CNN_with_AlphaEarth13.csv')
print('✅ 成功导出: data/water_CNN_with_AlphaEarth13.csv')
print('🎉 所有文件导出完成！总共12700个样本')

正在导出第13个文件...
✅ 成功导出: data/water_CNN_with_AlphaEarth13.csv
🎉 所有文件导出完成！总共12700个样本
