In [1]:
import ee

try:
    ee.Initialize(project='gee-satellite-data-456115')
    print("Earth Engine     initialization successful!")
except Exception as e:
    print("Initialization failed, need authentication:", str(e))
    # authentication process
    ee.Authenticate()
    # authentication successful, then initialize again
    ee.Initialize(project='gee-satellite-data-456115')
    print("Earth Engine initialization successful!")

Earth Engine     initialization successful!


In [2]:
import ee
import geemap
import os
import datetime
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt

In [13]:
# use local shapefile to define the study area
def get_roi_from_shapefile(shapefile_path):
    roi_asset = geemap.shp_to_ee(shapefile_path)
    if isinstance(roi_asset, ee.FeatureCollection):
        roi_geometry = roi_asset.geometry()
    else:
        roi_geometry = roi_asset
    return roi_geometry

# load the study area boundary
shapefile_path = 'shapefile/GeldersePoort_cliped.shp'
roi = get_roi_from_shapefile(shapefile_path)

## Get landsat data (1993-2015)

In [14]:
# 1. landsat data collection (1993-2015)
def get_landsat_collection(start_year, end_year):
    """get the Landsat data collection"""
    
    print(f"get the Landsat data: {start_year}-{min(end_year, 2015)}")
    
    # Landsat 5 (1993-2011)
    l5 = ee.ImageCollection('LANDSAT/LT05/C02/T1_L2') \
        .filterDate(f'{max(1993, start_year)}-01-01', '2011-12-31') \
        .filterBounds(roi) \
        .filter(ee.Filter.lt('CLOUD_COVER', 20))
    
    # Landsat 7 (1999-2013)
    l7 = ee.ImageCollection('LANDSAT/LE07/C02/T1_L2') \
        .filterDate(f'{max(1999, start_year)}-01-01', '2013-12-31') \
        .filterBounds(roi) \
        .filter(ee.Filter.lt('CLOUD_COVER', 20))
    
    # Landsat 8 (2013-2015)
    l8 = ee.ImageCollection('LANDSAT/LC08/C02/T1_L2') \
        .filterDate('2013-01-01', f'{min(end_year, 2015)}-12-31') \
        .filterBounds(roi) \
        .filter(ee.Filter.lt('CLOUD_COVER', 20))
    
    # get the amount of images
    l5_count = l5.size().getInfo()
    l7_count = l7.size().getInfo()
    l8_count = l8.size().getInfo()
    
    print(f"Landsat 5 images amount: {l5_count}")
    print(f"Landsat 7 images amount: {l7_count}")
    print(f"Landsat 8 images amount: {l8_count}")

    # define the preprocess function (Landsat 5/7)
    def preprocess_l57(image):
        # create the cloud mask based on the QA band
        qa = image.select('QA_PIXEL')
        cloud_mask = qa.bitwiseAnd(1 << 3).eq(0) 
        # add the time information
        date = ee.Date(image.get('system:time_start'))
        year = date.get('year')
        month = date.get('month')
        day = date.get('day')
        
        # clip to the study area and apply the cloud mask
        return image.updateMask(cloud_mask).clip(roi) \
            .set('year', year) \
            .set('month', month) \
            .set('day', day) \
            .set('date', date.format('YYYY-MM-dd'))

    # define the preprocess function (Landsat 8)
    def preprocess_l8(image):
        # create the cloud mask based on the QA band
        qa = image.select('QA_PIXEL')
        cloud_mask = qa.bitwiseAnd(1 << 3).eq(0) 
        # add the time information
        date = ee.Date(image.get('system:time_start'))
        year = date.get('year')
        month = date.get('month')
        day = date.get('day')
        
        # clip to the study area and apply the cloud mask
        return image.updateMask(cloud_mask).clip(roi) \
            .set('year', year) \
            .set('month', month) \
            .set('day', day) \
            .set('date', date.format('YYYY-MM-dd'))

    # process each collection
    print("processing the Landsat images...")
    l5_processed = l5.map(preprocess_l57)
    l7_processed = l7.map(preprocess_l57)
    l8_processed = l8.map(preprocess_l8)

    # merge the collections
    merged = ee.ImageCollection(l5_processed.merge(l7_processed).merge(l8_processed))
    total_count = merged.size().getInfo()
    print(f"Landsat data processing completed, {total_count} images")

    return merged

## Get sentinel2 data (2015-2023)

In [15]:
# 2. 获取Sentinel-2数据（2015-2023）
def get_sentinel2_collection(start_year, end_year):
    """get the Sentinel-2 data collection"""
    
    # 确保年份在有效范围内 (2015-2023)
    start_year = max(2015, start_year)
    end_year = min(2023, end_year)
    
    print(f"get the Sentinel-2 data: {start_year}-{end_year}")
    
    # 获取Sentinel-2地表反射率数据
    s2 = ee.ImageCollection('COPERNICUS/S2_SR') \
        .filterDate(f'{start_year}-01-01', f'{end_year}-12-31') \
        .filterBounds(roi) \
        .filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', 20))
    
    s2_count = s2.size().getInfo()
    print(f"Sentinel-2 images amount: {s2_count}")
    
    # 预处理函数
    def preprocess_s2(image):
        # create the cloud mask based on the SCL band
        scl = image.select('SCL')
        valid_mask = scl.gte(4).And(scl.lte(6))

        # add the time information
        date = ee.Date(image.get('system:time_start'))
        year = date.get('year')
        month = date.get('month')
        day = date.get('day')
        
        # clip to the study area and apply the cloud mask
        return image.updateMask(valid_mask).clip(roi) \
            .set('year', year) \
            .set('month', month) \
            .set('day', day) \
            .set('date', date.format('YYYY-MM-dd'))
    
    print("processing Sentinel-2 images...")
    processed = s2.map(preprocess_s2)
    print(f"Sentinel-2 data processing completed, {processed.size().getInfo()} images")
    
    return processed

In [16]:
# 3. create the temporal composite products of different temporal scales
# def create_time_composites(collection, start_year, end_year, temporal_scale='month'):
#     """
#     创建不同时间尺度的影像合成产品
    
#     该函数可以根据指定的时间尺度，从卫星影像集合中创建合成产品。
#     可以是月度、季节或年度尺度的合成。
#     """
#     print(f"create the composite products of {temporal_scale} scale...")
#     composites = []
    
#     # 获取当前时间作为比较基准
#     current_time = ee.Date(datetime.datetime.now().strftime('%Y-%m-%d'))
    
#     if temporal_scale == 'month':
#         for year in range(start_year, end_year + 1):
#             for month in range(1, 13):
#                 start_date = ee.Date.fromYMD(year, month, 1)
#                 end_date = start_date.advance(1, 'month')
                
#                 # 提取当月数据
#                 filtered = collection.filterDate(start_date, end_date)
                
#                 # 跳过没有数据的月份
#                 if filtered.size().getInfo() > 0:
#                     # 计算月度中值合成
#                     monthly = filtered.select('NDVI') \
#                         .median() \
#                         .set({
#                             'system:time_start': start_date.millis(),
#                             'year': year,
#                             'month': month,
#                             'temporal_scale': 'monthly'
#                         })
                    
#                     composites.append(monthly)
    
#     elif temporal_scale == 'season':
#         # 定义季节 (北半球)
#         seasons = {
#             'spring': [3, 4, 5],     # 春季: 3-5月
#             'summer': [6, 7, 8],     # 夏季: 6-8月
#             'autumn': [9, 10, 11],   # 秋季: 9-11月
#             'winter': [12, 1, 2]     # 冬季: 12-2月(跨年)
#         }
        
#         for year in range(start_year, end_year + 1):
#             for season_name, months in seasons.items():
#                 if season_name == 'winter':
#                     # 冬季跨年处理
#                     if year < end_year:
#                         start_date = ee.Date.fromYMD(year, 12, 1)
#                         end_date = ee.Date.fromYMD(year + 1, 3, 1)
#                 else:
#                     # 其他季节
#                     start_date = ee.Date.fromYMD(year, min(months), 1)
#                     end_date = ee.Date.fromYMD(year, max(months), 1).advance(1, 'month')
                
#                 # 跳过未来的季节
#                 if end_date.millis().getInfo() <= current_time.millis().getInfo():
#                     # 提取数据
#                     filtered = collection.filterDate(start_date, end_date)
                    
#                     # 跳过没有数据的季节
#                     if filtered.size().getInfo() > 0:
#                         # 季节中值合成
#                         seasonal = filtered.select('NDVI') \
#                             .median() \
#                             .set({
#                                 'system:time_start': start_date.millis(),
#                                 'year': year,
#                                 'season': season_name,
#                                 'temporal_scale': 'seasonal'
#                             })
                        
#                         composites.append(seasonal)
    
#     elif temporal_scale == 'year':
#         for year in range(start_year, end_year + 1):
#             # 生长季最大值合成 (5月到9月)
#             growing_start = ee.Date.fromYMD(year, 5, 1)
#             growing_end = ee.Date.fromYMD(year, 9, 30)
            
#             # 跳过未来的年份
#             if growing_end.millis().getInfo() <= current_time.millis().getInfo():
#                 # 提取生长季数据
#                 filtered = collection.filterDate(growing_start, growing_end)
                
#                 # 跳过没有数据的年份
#                 if filtered.size().getInfo() > 0:
#                     # 年度最大值合成
#                     yearly = filtered.select('NDVI') \
#                         .max() \
#                         .set({
#                             'system:time_start': ee.Date.fromYMD(year, 1, 1).millis(),
#                             'year': year,
#                             'temporal_scale': 'yearly'
#                         })
                    
#                     composites.append(yearly)
    
#     # 转换为影像集合
#     result = ee.ImageCollection.fromImages(composites)
#     print(f"completed, {result.size().getInfo()} composite products created")
#     return result

In [22]:
# # 4. 提取干扰事件数据 (暂时还未定洪水和干旱事件的时间)
# def extract_disturbance_events_fixed():
#     """提取2018年干旱和洪水事件的NDVI数据（修复版本）"""
    
#     print("提取2018年干旱和洪水事件数据...")
    
#     # 获取2018年Sentinel-2数据 (10m分辨率)
#     s2_2018 = get_sentinel2_collection(2018, 2018)
    
#     # 获取2017年底到2018年初的数据 (用于洪水事件)
#     s2_winter = ee.ImageCollection(
#         get_sentinel2_collection(2017, 2017).merge(
#         get_sentinel2_collection(2018, 2018))
#     ).filterDate('2017-12-01', '2018-02-28')
    
#     print("处理干旱事件数据...")
#     # 干旱事件提取 (2018年夏季)
#     drought_before = s2_2018.filterDate('2018-05-01', '2018-06-30').select('NDVI').median().clip(roi)
#     drought_during = s2_2018.filterDate('2018-07-01', '2018-08-31').select('NDVI').median().clip(roi)
#     drought_after = s2_2018.filterDate('2018-09-01', '2018-10-31').select('NDVI').median().clip(roi)
    
#     print("处理洪水事件数据...")
#     # 洪水事件提取 (2018年1月)
#     # 确保选择NDVI波段并处理潜在的空集合
#     flood_collection_before = s2_winter.filterDate('2017-12-20', '2018-01-04')
#     flood_collection_during = s2_winter.filterDate('2018-01-05', '2018-01-15')
#     flood_collection_after = s2_winter.filterDate('2018-01-16', '2018-02-10')
    
#     # 检查集合是否为空
#     if flood_collection_before.size().getInfo() > 0:
#         flood_before = flood_collection_before.select('NDVI').median().clip(roi)
#     else:
#         print("警告: 洪水前期数据为空，使用空白影像")
#         flood_before = ee.Image(0).rename('NDVI').clip(roi)
    
#     if flood_collection_during.size().getInfo() > 0:
#         flood_during = flood_collection_during.select('NDVI').median().clip(roi)
#     else:
#         print("警告: 洪水期间数据为空，使用空白影像")
#         flood_during = ee.Image(0).rename('NDVI').clip(roi)
    
#     if flood_collection_after.size().getInfo() > 0:
#         flood_after = flood_collection_after.select('NDVI').median().clip(roi)
#     else:
#         print("警告: 洪水后期数据为空，使用空白影像")
#         flood_after = ee.Image(0).rename('NDVI').clip(roi)
    
#     # 设置属性
#     drought_before = drought_before.set({'event': 'drought', 'stage': 'before'})
#     drought_during = drought_during.set({'event': 'drought', 'stage': 'during'})
#     drought_after = drought_after.set({'event': 'drought', 'stage': 'after'})
    
#     flood_before = flood_before.set({'event': 'flood', 'stage': 'before'})
#     flood_during = flood_during.set({'event': 'flood', 'stage': 'during'})
#     flood_after = flood_after.set({'event': 'flood', 'stage': 'after'})
    
#     print("干扰事件数据提取完成")
    
#     return {
#         'drought': {
#             'before': drought_before,
#             'during': drought_during,
#             'after': drought_after
#         },
#         'flood': {
#             'before': flood_before,
#             'during': flood_during,
#             'after': flood_after
#         }
#     }

In [7]:
# 5. 数据导出函数
# def export_to_drive(image, description, parent_folder, subfolder, scale=30):
#     """将影像导出到Google Drive"""
    
#     # 获取研究区域边界
#     region = roi.bounds().getInfo()['coordinates']
    
#     # 确保影像有波段
#     if image.bandNames().size().getInfo() == 0:
#         print(f"warning: the image {description} has no bands, try to add the default band...")
#         image = image.select([0]).rename('NDVI')
    
#     # 创建导出任务
#     export_task = ee.batch.Export.image.toDrive(
#         image=image,
#         description=f"{subfolder}_{description}",  
#         folder=parent_folder,  
#         scale=scale,
#         region=region,
#         maxPixels=1e13,
#         fileFormat='GeoTIFF',
#         formatOptions={'cloudOptimized': True}
#     )
    
#     # 启动任务
#     export_task.start()
#     print(f"export task submitted: {subfolder}_{description} → {parent_folder}")
    
#     return export_task

In [11]:
def download_original_images(start_year=2018, end_year=2020, parent_folder='GEE_Original'):
    """下载2018-2020年的年度卫星原始波段数据
    
    Args:
        start_year: 起始年份，默认2018
        end_year: 结束年份，默认2020
        parent_folder: Google Drive中的父文件夹名称
    """
    
    print(f"开始下载{start_year}-{end_year}年度卫星原始数据...")
    
    # 获取Sentinel-2数据
    sentinel = get_sentinel2_collection(start_year, end_year)
    
    # 按年份分组导出
    for year in range(start_year, end_year + 1):
        # 提取每年生长季数据
        start_date = ee.Date.fromYMD(year, 5, 1)  # 5月1日
        end_date = ee.Date.fromYMD(year, 9, 30)   # 9月30日
        
        # 从当年生长季筛选一张云量最少的图像
        year_collection = sentinel.filterDate(start_date, end_date)
        best_image = year_collection.sort('CLOUDY_PIXEL_PERCENTAGE').first()
        
        # 获取图像的日期作为标识
        image_date = best_image.get('date').getInfo()
        
        print(f"导出{year}年度最佳原始图像，日期: {image_date}")
        
        # 导出任务
        export_to_drive(
            image=best_image,
            description=f'original_{year}_{image_date}',
            subfolder='Original',
            parent_folder=parent_folder,
            scale=10  # Sentinel-2用10m分辨率
        )
    
    print("\n所有导出任务已提交，请在 https://code.earthengine.google.com/tasks 查看任务进度")
    print(f"下载完成后，文件将保存在您的Google Drive {parent_folder} 文件夹中")
    


In [9]:
download_original_images(2018, 2020, 'GEE_Original')

start to test the download of satellite data of 2018-2020...
get the Sentinel-2 data: 2018-2020



Attention required for COPERNICUS/S2_SR! You are using a deprecated asset.
To make sure your code keeps working, please update it.
Learn more: https://developers.google.com/earth-engine/datasets/catalog/COPERNICUS_S2_SR



Sentinel-2 images amount: 281
processing Sentinel-2 images...
Sentinel-2 data processing completed, 281 images
create the NDVI composite products of year scale...
completed, 3 composite products created
start to export the annual NDVI example...
export task submitted: NDVI_annual_2018 → GEE
export task submitted: NDVI_annual_2019 → GEE
export task submitted: NDVI_annual_2020 → GEE

all test export tasks submitted, please check the task progress at https://code.earthengine.google.com/tasks
successfully downloaded, the files will be saved in the subfolder of your Google Drive GEE
