In [5]:
import ee
from google.oauth2 import service_account
import numpy as np
import pandas as pd
from masks import mask_MODIS_clouds, MODIS_Mask_QC, mask_s2_clouds, mask_s2_clouds_collection


# Path to the private key file
key_path = 'Access/ee-martinparker637-e68fde65abb4.json'

# Load the service account credentials
credentials = service_account.Credentials.from_service_account_file(
    key_path,
    scopes=['https://www.googleapis.com/auth/earthengine'])

# Initialize Earth Engine with the service account credentials
ee.Initialize(credentials)

In [2]:
root_directory = ''#'home/users/wlwc1989/phenology_dwd/'
#root_directory = 'C:\\Users\\wlwc1989\\Documents\\Phenology_Test_Notebooks\\phenology_dwd\\'

In [3]:
coords = np.loadtxt(root_directory + "Saved_files/station_coords.csv", delimiter=',')

In [4]:
#coords = np.loadtxt("C:\\Users\\wlwc1989\\Documents\\Phenology_Test_Notebooks\\phenology_dwd\\Saved_files\\station_coords.csv", delimiter=',')

In [6]:
def satellite_data_at_coords(coords, start_date = '2000-01-01', end_date = '2022-12-31', instrument = "COPERNICUS/S2_SR_HARMONIZED", bands = ['B4', 'B8'], box_width = 0.002, pixel_scale = 500, QC_function = mask_s2_clouds):
    for coord_index, coord in enumerate(coords):
        print(coord_index)
        location = ee.Geometry.BBox(coord[1] - box_width, coord[0] - box_width, coord[1] + box_width, coord[0] + box_width) #ee.Geometry.Point(coord[:2])#
        dataset = ee.ImageCollection(instrument)
        dataset = QC_function(dataset)
        filtered_dataset = dataset.filterDate(start_date, end_date).filterBounds(location)
        time_series_data = filtered_dataset.select(*bands).map(lambda img: img.set('median', img.reduceRegion(ee.Reducer.median(), location , pixel_scale)))#.getInfo()#.getRegion(location, pixel_scale).getInfo()#.getRegion(location, 30).getInfo()#('B4')#.median()#.get('B4')#reduceColumns(ee.Reducer.median().setOutputs(['median']), [''])#.get('B4')
        timelist = time_series_data.aggregate_array('system:time_start').getInfo()
        print('times retrieved')
        bandlist = time_series_data.aggregate_array('median').getInfo()
        print('bands retrieved')
        dataset = {'Time': timelist,
                   'lat': [coord[0] for count in range(len(timelist))],
                   'lon': [coord[1] for count in range(len(timelist))],
                   'Stations_Id': [np.int64(coord[2]) for count in range(len(timelist))]
                   }
        for band in bands:
            dataset[f'Median {band}'] = [band_data[band] for band_data in bandlist]
        df = pd.DataFrame(dataset)
        df['formatted_time'] = pd.to_datetime(df['Time'], unit='ms').dt.strftime('%Y-%m-%d-%H-%M-%S')
        if coord_index == 0:
            df_full = df
        else:
            df_full = pd.concat([df_full, df])
    return df_full

In [8]:
df = satellite_data_at_coords(coords[:2], start_date='2020-10-01', instrument = "MODIS/061/MOD09A1", QC_function = lambda IC: IC, bands = [f'sur_refl_b0{n}' for n in range(1, 5)]) #"MODIS/061/MOD09GA"

0
times retrieved
bands retrieved
1
times retrieved
bands retrieved


In [None]:
df = satellite_data_at_coords(coords[:2], start_date='2020-10-01', instrument = "MODIS/061/MOD09A1", QC_function = lambda IC: IC.map(MODIS_Mask_QC).map(mask_MODIS_clouds), bands = [f'sur_refl_b0{n}' for n in range(1, 5)]) #"MODIS/061/MOD09GA"

In [9]:
df

Unnamed: 0,Time,lat,lon,Stations_Id,Median sur_refl_b01,Median sur_refl_b02,Median sur_refl_b03,Median sur_refl_b04,formatted_time
0,1602028800000,54.7833,9.4333,7501,1043.0,2177.0,972.0,1081.0,2020-10-07-00-00-00
1,1602720000000,54.7833,9.4333,7501,365.0,998.0,244.0,334.0,2020-10-15-00-00-00
2,1603411200000,54.7833,9.4333,7501,598.0,2037.0,386.0,540.0,2020-10-23-00-00-00
3,1604102400000,54.7833,9.4333,7501,886.0,1758.0,576.0,816.0,2020-10-31-00-00-00
4,1604793600000,54.7833,9.4333,7501,1120.0,2199.0,748.0,1124.0,2020-11-08-00-00-00
...,...,...,...,...,...,...,...,...,...
98,1669334400000,54.4000,10.1500,7502,1922.0,2622.0,1994.0,2062.0,2022-11-25-00-00-00
99,1670025600000,54.4000,10.1500,7502,6277.0,8119.0,5382.0,6108.0,2022-12-03-00-00-00
100,1670716800000,54.4000,10.1500,7502,2475.0,3189.0,2573.0,2560.0,2022-12-11-00-00-00
101,1671408000000,54.4000,10.1500,7502,644.0,1713.0,756.0,791.0,2022-12-19-00-00-00


In [69]:
def mask_s2_clouds(image):
    """Masks clouds in a Sentinel-2 image using the QA band.

    Args:
        image (ee.Image): A Sentinel-2 image.

    Returns:
        ee.Image: A cloud-masked Sentinel-2 image.
    """
    qa = image.select('QA60')

    # Bits 10 and 11 are clouds and cirrus, respectively.
    cloud_bit_mask = 1 << 10
    cirrus_bit_mask = 1 << 11

    # Both flags should be set to zero, indicating clear conditions.
    mask = (
        qa.bitwiseAnd(cloud_bit_mask)
        .eq(0)
        .And(qa.bitwiseAnd(cirrus_bit_mask).eq(0))
    )

    return image.updateMask(mask)


In [70]:
def mask_MODIS_clouds(image):
    """Masks clouds in a Sentinel-2 image using the stateQA band.

    Args:
        image (ee.Image): A Sentinel-2 image.

    Returns:
        ee.Image: A cloud-masked Sentinel-2 image.
    """
    qa = image.select('state_1km')
    # Bits 0-1 are cloud, 2 cloud shadow, 8-9 cirrus
    cloud_bit_mask = 3 << 0
    #cloud_bit_mask2 = 1 << 1
    cloud_shadow_bit_mask = 1 << 2
    cirrus_bit_mask = 3 << 8
    #cirrus_bit_mask2 = 1 << 9
    mask = (
        qa.bitwiseAnd(cloud_bit_mask).eq(0)
        .And(qa.bitwiseAnd(cirrus_bit_mask).eq(0)
             .And(qa.bitwiseAnd(cloud_shadow_bit_mask).eq(0)
                 )
            )
    )
    return image.updateMask(mask)

In [85]:
def MODIS_Mask_QC(image):
    """Masks clouds in a Sentinel-2 image using the stateQA band.

    Args:
        image (ee.Image): A Sentinel-2 image.

    Returns:
        ee.Image: A cloud-masked Sentinel-2 image.
    """
    qa = image.select('QC_500m')
    # Bits 0-1 are cloud, 2 cloud shadow, 8-9 cirrus
    b1_mask = 15 << 2
    b2_mask = 15 << 6
    b3_mask = 15 << 10
    b4_mask = 15 << 14
    mask = (
        qa.bitwiseAnd(b1_mask).eq(0)
        .And(qa.bitwiseAnd(b2_mask).eq(0)
             .And(qa.bitwiseAnd(b3_mask).eq(0)
                  .And(qa.bitwiseAnd(b4_mask).eq(0)
                      )
                 )
            )
    )
    return image.updateMask(mask)

In [72]:
def mask_s2_clouds_collection(image_collection):
    """Masks clouds in a Sentinel-2 image collection using the SLC band.

    Args:
        image (ee.ImageCollection): A Sentinel-2 image collection.

    Returns:
        ee.Image: A cloud-masked Sentinel-2 image collection.
    """
    return image_collection.map(mask_s2_clouds)

In [88]:
df#.dropna()

Unnamed: 0,Time,lat,lon,Stations_Id,Median sur_refl_b01,Median sur_refl_b02,Median sur_refl_b03,Median sur_refl_b04,formatted_time
0,1601510400000,54.7833,9.4333,7501,5123.0,5745.0,4784.0,4979.0,2020-10-01-00-00-00
1,1601596800000,54.7833,9.4333,7501,4805.0,5016.0,5169.0,4898.0,2020-10-02-00-00-00
2,1601683200000,54.7833,9.4333,7501,8798.0,8922.0,8992.0,8906.0,2020-10-03-00-00-00
3,1601769600000,54.7833,9.4333,7501,198.0,649.0,146.0,197.0,2020-10-04-00-00-00
4,1601856000000,54.7833,9.4333,7501,1693.0,2471.0,1825.0,1778.0,2020-10-05-00-00-00
...,...,...,...,...,...,...,...,...,...
804,1672012800000,54.4000,10.1500,7502,4882.0,4813.0,5753.0,5038.0,2022-12-26-00-00-00
805,1672099200000,54.4000,10.1500,7502,637.0,2581.0,376.0,697.0,2022-12-27-00-00-00
806,1672185600000,54.4000,10.1500,7502,7950.0,8195.0,7880.0,7983.0,2022-12-28-00-00-00
807,1672272000000,54.4000,10.1500,7502,7946.0,8172.0,7097.0,7791.0,2022-12-29-00-00-00


In [10]:
coords[:3]

array([[  54.7833,    9.4333, 7501.    ],
       [  54.4   ,   10.15  , 7502.    ],
       [  54.3167,   10.05  , 7504.    ]])

In [18]:
df.to_csv(root_directory + 'Saved_files/sat_data0.csv')

In [22]:
fig, ax = plt.subplots()
for station in df['Stations_Id'].unique():
    df_reduced = df.where(df['Stations_Id'] == station).dropna()
    plt.plot(df_reduced['formatted_time'], df_reduced['Median B4'])

SyntaxError: expected ':' (1440415578.py, line 2)

In [47]:
#location = ee.Geometry.Point([10.05, 54.3167]) #(lon, lat)
Instrument = "MODIS/061/MOD09GA"
#Instrument = "MODIS/061/MOD09GQ"
#Instrument = "COPERNICUS/S2_SR_HARMONIZED"
coord = [54.3167, 10.05, 7504.0]
box_width = 0.002

location = ee.Geometry.BBox(8, 48, 15, 58)#ee.Geometry.BBox(coord[1] - box_width, coord[0] - box_width, coord[1] + box_width, coord[0] + box_width)
# Define the satellite imagery dataset (e.g., Landsat 8)
dataset = ee.ImageCollection(Instrument)

# Filter the dataset by location and date range
filtered_dataset = dataset.filterBounds(location).filterDate('2000-01-01', '2022-12-31')

In [52]:
filtered_dataset = filtered_dataset.map(MODIS_Mask_QC).map(mask_MODIS_clouds)

In [53]:
# Define a function to calculate NDVI
def calculate_ndvi(image, band1 = 'B4', band2 = 'B8'):
    ndvi = image.normalizedDifference([band1, band2]).rename('NDVI')
    return image.addBands(ndvi)

def get_median(image):
    return image.set('median', image.reduceRegion(ee.Reducer.median(), location , pixel_scale))

# Map the NDVI calculation function over the filtered dataset
ndvi_dataset = filtered_dataset.map(lambda image: calculate_ndvi(image, band1 = 'sur_refl_b01', band2 = 'sur_refl_b02'))

In [54]:
location = ee.Geometry.BBox(coord[1] - box_width, coord[0] - box_width, coord[1] + box_width, coord[0] + box_width) #ee.Geometry.Point(coord[:2])#
ndvi_dataset = ndvi_dataset.filterBounds(location)

In [27]:
ndvi_dataset = ndvi_dataset.map(mask_MODIS_clouds)

In [None]:
pixel_scale = 500
bands = [f'sur_refl_b0{n}' for n in range(1, 5)]
bands.append('NDVI')
time_series_data = ndvi_dataset.select(bands).map(get_median)#.getInfo()#.getRegion(location, pixel_scale).getInfo()#.getRegion(location, 30).getInfo()#('B4')#.median()#.get('B4')#reduceColumns(ee.Reducer.median().setOutputs(['median']), [''])#.get('B4')
timelist = time_series_data.aggregate_array('system:time_start').getInfo()
print('times sorted')
bandlist = time_series_data.aggregate_array('median').getInfo()
print('bands sorted')
#QC_check = time_series_data.aggregate_array('state_1km').getInfo()
#print('QC sorted')
dataset = {'Time': timelist,
            'lat': [coord[0] for count in range(len(timelist))],
            'lon': [coord[1] for count in range(len(timelist))],
            'Stations_Id': [np.int64(coord[2]) for count in range(len(timelist))]
            }

times sorted


In [62]:
[f'sur_refl_b0{n}' for n in range(1, 5)].append(['NDVI'])

In [63]:
bands = [f'sur_refl_b0{n}' for n in range(1, 5)]
bands.append('NDVI')
for band in bands:
    dataset[f'Median {band}'] = [band_data[band] for band_data in bandlist]
df = pd.DataFrame(dataset)
df['formatted_time'] = pd.to_datetime(df['Time'], unit='ms').dt.strftime('%Y-%m-%d-%H-%M-%S')

KeyError: 'sur_refl_b01'

In [33]:
time_series_data = ndvi_dataset.select('NDVI', 'state_1km')#.map(get_median)
QC_check = time_series_data.aggregate_array('state_1km').getInfo()

In [22]:
df.dropna()

Unnamed: 0,Time,lat,lon,Stations_Id,Median NDVI,formatted_time
73,957657600000,54.3167,10.05,7504,-0.668035,2000-05-07-00-00-00
82,958435200000,54.3167,10.05,7504,-0.631034,2000-05-16-00-00-00
168,966902400000,54.3167,10.05,7504,-0.532244,2000-08-22-00-00-00
289,977356800000,54.3167,10.05,7504,-0.385653,2000-12-21-00-00-00
310,979171200000,54.3167,10.05,7504,-0.499356,2001-01-11-00-00-00
...,...,...,...,...,...,...
8122,1657929600000,54.3167,10.05,7504,-0.608215,2022-07-16-00-00-00
8125,1658188800000,54.3167,10.05,7504,-0.576904,2022-07-19-00-00-00
8147,1660089600000,54.3167,10.05,7504,-0.582211,2022-08-10-00-00-00
8218,1667260800000,54.3167,10.05,7504,-0.550582,2022-11-01-00-00-00


In [13]:
time_series_data = ndvi_dataset.select('NDVI').getRegion(location, 30).getInfo() 

EEException: ImageCollection.getRegion: Too many values: 963824480 points x 1 bands x 8278 images > 1048576.

In [58]:
coords[:10, :2]

array([[ 9.4333, 54.7833],
       [10.15  , 54.4   ],
       [10.05  , 54.3167],
       [10.6667, 53.8833],
       [10.7   , 53.85  ],
       [ 9.9833, 54.0833],
       [ 9.7   , 54.4333],
       [ 9.9833, 54.6167],
       [ 9.9667, 54.4   ],
       [ 9.85  , 54.4   ]])

In [63]:
#points = ee.Geometry.MultiPoint(coords[:2, :2].tolist())
location = ee.Geometry.Point([10.05, 54.3167])
time_series_data = filtered_dataset.select('B4', 'B8').getRegion(location, 30).getInfo()   #location, 30).getInfo()#

In [40]:
loc_climates = filtered_dataset.reduceRegions(points, ee.Reducer.first())

AttributeError: 'ImageCollection' object has no attribute 'reduceRegions'

In [None]:
filtered_dataset.reduce()

In [23]:
ds = pd.dataframe(time_series_data[1:, :], columns = time_series_data[0, :])

NameError: name 'pd' is not defined

In [30]:
time_series_data = np.array(time_series_data)

In [None]:
time_series_data

In [24]:
print(len(np.unique(np.array(time_series_data)[:, 3])), len(np.array(time_series_data)[:, 3]))

781 163801


In [6]:
# Get the time series data for the location
time_series_data = ndvi_dataset.getRegion(location, 30).getInfo()


EEException: Expected a homogeneous image collection, but an image with incompatible bands was encountered:
  First image type: 27 bands ([B1, B2, B3, B4, B5, B6, B7, B8, B8A, B9, B11, B12, AOT, WVP, SCL, TCI_R, TCI_G, TCI_B, MSK_CLDPRB, MSK_SNWPRB, QA10, QA20, QA60, MSK_CLASSI_OPAQUE, MSK_CLASSI_CIRRUS, MSK_CLASSI_SNOW_ICE, NDVI]).
Current image type: 27 bands ([B1, B2, B3, B4, B5, B6, B7, B8, B8A, B9, B11, B12, AOT, WVP, SCL, TCI_R, TCI_G, TCI_B, QA10, QA20, QA60, MSK_CLASSI_OPAQUE, MSK_CLASSI_CIRRUS, MSK_CLASSI_SNOW_ICE, MSK_CLDPRB, MSK_SNWPRB, NDVI]).
          Image ID: 20170503T104021_20170503T104024_T32UNF
Some bands might require explicit casts.

In [None]:

# Extract the time property separately and format it
time_values = [item[0] for item in time_series_data[1:]]
time_values = pd.to_datetime([item.split('_')[-1] for item in time_values], format='%Y%m%d')
# Create a pandas DataFrame from the time series data (excluding time)
df = pd.DataFrame([item[1:] for item in time_series_data[1:]], columns=time_series_data[0][1:])
df['time'] = time_values