In [1]:
import os
import ee
import geemap
os.environ['HTTP_PROXY'] = 'http://127.0.0.1:7890'
os.environ['HTTPS_PROXY'] = 'http://127.0.0.1:7890'
Map = geemap.Map()
geemap.ee_initialize()

In [3]:
import pandas as pd

In [21]:
def getS2ImageCol(roi, s2, dataRage):
    """生成年度无云Sentinel 2 ImageCollection。
    Args:
        roi (object, optional): 感兴趣区域
        dataRage (ee.DateRange, optional): 时间范围
        bandSelect (int, optional): 筛选波段
    Returns:
        object: 返回包含年度 Sentinel 2 图像的 ImageCollection。
    """
    def rmCloudByQA(image):
        qa = image.select('QA60')
        cloudBitMask = 1 << 10
        cirrusBitMask = 1 << 11
        mask = qa.bitwiseAnd(cloudBitMask).eq(0).And(qa.bitwiseAnd(cirrusBitMask).eq(0))
        return image.updateMask(mask)

    def reScale(image):
        return image.divide(10000)

    s2Col = (ee
             .ImageCollection(s2)
             .filterBounds(roi)
             .filterDate(dataRage)
             .map(rmCloudByQA)
             .select(["B2", "B3", "B4", "B5", "B6", "B7", "B8", "B8A", "B11", "B12"])
             .map(reScale)
             )

    return s2Col

In [22]:
def getAlphaEarthImage(roi, year):
    """获取AlphaEarth数据中重要的Alpha波段
    根据SHAP分析，重要的波段为：Alpha32, Alpha64, Alpha37, Alpha48, Alpha47
    在GEE中对应为：A31, A63, A36, A47, A46 (因为从A00开始)
    
    Args:
        roi (object): 感兴趣区域
        year (int): 年份
    Returns:
        object: 返回包含重要Alpha波段的图像
    """
    # 重要的Alpha波段：Alpha32->A31, Alpha64->A63, Alpha37->A36, Alpha48->A47, Alpha47->A46
    important_bands = ["A31", "A63", "A36", "A47", "A46"]
    
    # 获取指定年份的AlphaEarth数据
    start_date = str(year) + '-01-01'
    end_date = str(year + 1) + '-01-01'
    
    alphaearth_img = (ee.ImageCollection(alphaearth_collection)
                      .filterDate(start_date, end_date)
                      .filterBounds(roi)
                      .mean()
                      .select(important_bands)
                      .clip(roi))
    
    return alphaearth_img

In [23]:
year = 2022
start = str(year)+'-09-01'
end =  str(year)+'-12-31'
dataRage = ee.DateRange(start, end)

s2 = "COPERNICUS/S2_SR_HARMONIZED"

# AlphaEarth数据集 - Google Satellite Embedding
alphaearth_collection = "GOOGLE/SATELLITE_EMBEDDING/V1/ANNUAL"

In [24]:
# 定义ROI为标注点集合的最小外接矩形
points = ee.FeatureCollection("projects/ee-wang/assets/pnt")
roi = points.geometry().bounds()

# 获取Sentinel-2数据
s2ImageCol = getS2ImageCol(roi, s2, dataRage)
s2ImageCol = s2ImageCol.median().clip(roi)

# 获取AlphaEarth数据
alphaearth_img = getAlphaEarthImage(roi, year)

# 合并两个数据集
combined_img = s2ImageCol.addBands(alphaearth_img)



In [26]:
# 创建5x5窗口的卷积核
radius = 2
kernel = ee.Kernel.square(radius, 'pixels', False)

# 对合并后的图像(Sentinel-2 + AlphaEarth)进行5x5窗口采样
img_array = ee.Image(combined_img).neighborhoodToArray(kernel)

In [8]:

points = ee.FeatureCollection("projects/ee-wang/assets/pnt") \
            .randomColumn('random', 1).filter(ee.Filter.gt('random', 0).And(ee.Filter.lt('random', 0.05)))\
            .map(lambda fea: fea.set('random', None)) \
            .randomColumn('random', 1)\
            .select(['landcover','random'])

print(f"样本大小是{points.size().getInfo()}")
print(points.first().getInfo())


样本大小是12700
{'type': 'Feature', 'geometry': {'type': 'Point', 'coordinates': [-15.443030773458025, 64.57640269623263]}, 'id': '0000000000000000002a', 'properties': {'ASPECT': 0, 'B1': 0.838199973, 'B11': 0.040899999, 'B12': 0.0425, 'B2': 0.862600029, 'B3': 0.880999982, 'B4': 0.862999976, 'B5': 0.877699971, 'B6': 0.837100029, 'B7': 0.791599989, 'B8': 0.772800028, 'B8A': 0.734600008, 'B9': 0.745999992, 'CIRE': -0.103564158, 'CIRE_MAX': -0.061351467, 'CIRE_MEAN': -0.112698182, 'CIRE_MIN': -0.174052447, 'DEM': 1297.592041, 'ENTROPY': 4.78386116, 'EVI': -0.354765333, 'EVI_MAX': 5.33256267, 'EVI_MEAN': -0.216274676, 'EVI_MIN': -1.065846403, 'Field1': 42, 'MSK_SNWPRB': 99.99999776, 'MSK_SNWP_1': 0.01, 'MSK_SNWP_2': 0.0002, 'MSK_SNWP_3': 0.00825238, 'NDBI': -0.919416785, 'NDBI_MAX': -0.392683357, 'NDBI_MEAN': -0.855272293, 'NDBI_MIN': -0.978207886, 'NDMI': 91.53472185, 'NDMI_MAX': 0.977087736, 'NDMI_MEAN': 0.853083849, 'NDMI_MIN': 0.458547562, 'NDSI': 92.09432602, 'NDSI_MAX': 0.982986331, 'NDSI

In [34]:
#points = train.merge(test)

In [28]:
result = img_array.sampleRegions(**{
  'collection': points,
  'scale': 90, 
})

In [29]:
geemap.ee_to_csv(result, filename='water_CNN_with_AlphaEarth.csv')