In [1]:
import ee
import geemap
import xml.etree.ElementTree as ET
import ipywidgets as widgets
import pandas as pd 
from pprint import pprint 

In [2]:
ee.Authenticate()

True

In [3]:
ee.Initialize(project="jameswilliamchamberlain")

In [4]:
# collect Samarra Pin

# UNESCO World Heritage Sites List xml file 
tree = ET.parse("whs.xml")
root = tree.getroot()

data = []

for row in root.findall("row"):
    site = row.findtext('site')
    category = row.findtext('category')
    date_inscribed = row.findtext('date_inscribed')
    region = row.findtext('region')
    url = row.findtext('http_url')
    longitude = row.findtext('longitude')
    latitude = row.findtext('latitude')
    iso = row.findtext('iso_code')
    iso = iso.split(',')
    data.append([site, category, date_inscribed, region, url, longitude, latitude, iso])

df = pd.DataFrame(data, columns=["name", "category", "date inscribed", "region", "url", "longitude", "latitude", "iso"])

df["longitude"] = pd.to_numeric(df["longitude"], errors='coerce')
df["latitude"] = pd.to_numeric(df["latitude"], errors='coerce')

# drop empties 
df = df.dropna(subset=["longitude", "latitude"]).reset_index(drop=True) 

# filter out everything but Samarra 
target_url = 'https://whc.unesco.org/en/list/276'
df_disp = df[df['url'] == target_url]
df_disp = df_disp[df_disp['url'] == target_url]
df_disp = df_disp.reset_index(drop=True)

In [5]:
m = geemap.Map()
m.add_points_from_xy(df_disp, x="longitude", y="latitude", layer_name="Sites")

In [6]:
# util functions and widgets
def map_polygon(polygon, collection_name, layer_name, yyyymmdd1="2024-01-01", yyyymmdd2="2024-12-29", num_tasks=10):

    collection = ee.ImageCollection(collection_name) \
        .filterDate(yyyymmdd1, yyyymmdd2) \
        .filterBounds(polygon) \
        .filter(ee.Filter.lt("CLOUDY_PIXEL_PERCENTAGE", 20)) \
        .median() \
        .clip(polygon)
    
    vis = {'min': 0, 'max': 3000, 'bands': ['B4', 'B3', 'B2']}

    m.addLayer(collection, vis, layer_name)

def draw_rectangles_for_date(date_val, size_deg=1):

    regions = []

    for _, row in df_disp.iterrows():
        lat = row['latitude']
        lon = row['longitude']

        geometry = ee.Geometry.Rectangle([lon - size_deg/2, lat - size_deg/2, lon + size_deg/2, lat + size_deg/2])
        regions.append(geometry)

    regions_collection = ee.FeatureCollection(regions)

    map_polygon(regions_collection, "COPERNICUS/S2", f"S2 {date_val}", f"{date_val}-01-01", f"{date_val}-3-31")

def draw_basic(b):
    dates = [2025, 2020, 2016]
    # dates = [2025, 2024, 2022, 2020, 2018, 2016]
    # dates = [2025, 2024, 2023, 2022, 2021, 2020, 2019, 2018, 20217, 2016]

    for date in dates:
        draw_rectangles_for_date(date)

def draw_selected(b):

    if m.draw_features is None:
        raise ValueError("Error: no roi")
    
    # roi = m.draw_last_feature.geometry()
    roi = ee.FeatureCollection(m.draw_features)

    year_start_val = int(date_slider.value)
    month_start_val = int(month_slider.value)
    time_frame_val = int(time_frame_slider.value)

    month_end_val = month_start_val + time_frame_val
    year_overflow = 0
    if month_end_val > 12:
        month_end_val -= 12
        year_overflow = 1

    yyyymm_start = f"{year_start_val}-{month_start_val}"
    yyyymm_end = f"{year_start_val + year_overflow}-{month_end_val}"
    
    map_polygon(roi, "COPERNICUS/S2_HARMONIZED", yyyymmdd1=yyyymm_start, yyyymmdd2=yyyymm_end, layer_name=f"S2 {yyyymm_start} to {yyyymm_end}")

# Define Sliders 
date_slider = widgets.SelectionSlider(
    options=[str(year) for year in range(2016, 2026)],
    value='2020',
    description='Date:',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True
)

month_slider = widgets.SelectionSlider(
    options=[str(month) for month in range(1, 13)],
    value='1',
    description='Month:',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True
)

time_frame_slider = widgets.SelectionSlider(
    options=[str(month) for month in range(1, 13)],
    value='3',
    description='timeframe:',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True
)


In [7]:
render_btn = widgets.Button(description="Draw Selected Date Layer", position="bottomright") 
draw_btn = widgets.Button(description="Draw Pins Areas 3yrs", position="bottomright")

widget_draw = widgets.VBox([draw_btn, render_btn])

m.add_widget(widget_draw, position="bottomright")

display(widgets.HBox([date_slider, month_slider, time_frame_slider, render_btn]))
render_btn.on_click(draw_selected)
draw_btn.on_click(draw_basic)

m

HBox(children=(SelectionSlider(continuous_update=False, description='Date:', index=4, options=('2016', '2017',…

Map(center=[0, 0], controls=(WidgetControl(options=['position', 'transparent_bg'], widget=SearchDataGUI(childr…

In [8]:
# show a GEE polygon above 
# load from asset 
# asset_id = https://code.earthengine.google.com/?asset=projects/jameswilliamchamberlain/assets/test/test_samarra_polygon_01
asset_polygon = ee.FeatureCollection("projects/jameswilliamchamberlain/assets/test/test_samarra_polygon_01")

# add the polygon to the map
m.addLayer(asset_polygon, {}, "Samarra Polygon")    


# Useful Links 

[Google Earth Engine Editor](https://code.earthengine.google.com/)

[GEE Tasks (Bulk cancel mode)](https://code.earthengine.google.com/tasks)

In [9]:
def export_to_gee(polygon, folder_name="test", name="polygon_test"):
    """
        Exports tiles to GEE asset

        Args:
            polygon (ee.Geometry): The polygon to export. (Note: must be a single ee.Geometry)
            folder_name (str): The folder name in GEE where the asset will be stored.
            name (str): The name of the asset.
    """

    task = ee.batch.Export.table.toAsset(
        collection=ee.FeatureCollection([polygon]),
        description=name,
        assetId=f"projects/jameswilliamchamberlain/assets/{folder_name}/{name}"
    )

    task.start()

    # this id can be used to reference specifc tasks rather than needing to check each individual task 
    return task.status()['id'] 

In [10]:
# from tile import tile 

# tiler = tile()
# tiler.version()

# year_start = 2017
# year_end = 2025
# month_step = 1


# # polygon 
# polygon = m.draw_last_feature.geometry() if m.draw_features else None
# export_id = None

# if polygon is not None:
#     print(f"Polygon: {polygon.getInfo()}")

#     # save to gee
#     export_id = export_to_gee(polygon, folder_name="test", name="test_samarra_polygon_01")

#     print(f"Exported polygon to GEE asset: {export_id}")

# else:
#     raise ValueError("Please draw a polygon on the map above or import your own polygon in place of #polygon defined above.")

In [11]:
# check status 
def check_task_status(task_id):
    """
        Check the status of a GEE task by its ID
    """
    status = ee.data.getTaskStatus(task_id) # depricated but easier to work with!!  
    # print(f"Task ID: {task_id} status is {status[0]['state']}")

    return status

# check_task_status(task_id=export_id)

In [5]:
import ee
# ee.Authenticate()
ee.Initialize(project="jameswilliamchamberlain")

def export_s2_with_ndvi_to_drive_from_asset(
    asset_path,
    start_date,
    end_date,
    filename=f"s2_ndvi_",
    folder="exported_assets"
):
    # Load polygon from asset
    region = ee.FeatureCollection(asset_path).geometry()

    # Load and process Sentinel-2 imagery
    # 2018 onward seems to have a diffenrt cloud mask - so its detecting colouds before thtat that do not exist for some reason so have to lower the cloud percentage threshold
    # s2 = ee.ImageCollection("COPERNICUS/S2_SR") \
    #     .filterDate(start_date, end_date) \
    #     .filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', 10)) \
    #     .filterBounds(region) \
    #     .median()
    
    s2 = ee.ImageCollection("COPERNICUS/S2_SR_HARMONIZED") \
        .filterDate(start_date, end_date) \
        .filterBounds(region) \
        .filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', 10)) \
        .median()
    

    # Compute NDVI
    # shourtcut fn https://developers.google.com/earth-engine/tutorials/tutorial_api_06
    ndvi = s2.normalizedDifference(['B8', 'B4']).rename('NDVI')
    image_with_ndvi = s2.addBands(ndvi).toFloat()

    # Create export task to Drive
    task = ee.batch.Export.image.toDrive(
        image=s2,
        description=filename,
        folder=folder,
        fileNamePrefix=filename,
        region=region.bounds().getInfo()['coordinates'],
        scale=10,
        maxPixels=1e13,
        fileFormat='GeoTIFF'
    )

    task.start()
    return task.status()['id']


# TEST 

# task_id = export_s2_with_ndvi_to_drive_from_asset(
#     asset_path="projects/jameswilliamchamberlain/assets/test/test_samarra_polygon_01",
#     start_date="2020-01-01",
#     end_date="2020-02-29",
#     filename="s2_ndvi_samarra",
#     folder="samarra_exports"
# )

In [13]:
# check_task_status(task_id=task_id)

# raise ValueError("BREAKS")

In [6]:
# get start and end date for each month 2017 to present 

from datetime import datetime, timedelta

def get_month_start_end(year, month):
    """
        Get the start and end date for a given month and year.
    """
    start_date = datetime(year, month, 1)
    if month == 12:
        end_date = datetime(year + 1, 1, 1) - timedelta(days=1)
    else:
        end_date = datetime(year, month + 1, 1) - timedelta(days=1)

    return start_date.strftime('%Y-%m-%d'), end_date.strftime('%Y-%m-%d')

def get_monthly_date_ranges(start_year, end_year):
    """
        Get a list of start and end dates for each month from start_year to end_year.
    """
    date_ranges = []
    for year in range(start_year, end_year + 1):
        for month in range(1, 13):
            start_date, end_date = get_month_start_end(year, month)
            date_ranges.append((start_date, end_date))
    return date_ranges

# Get monthly date ranges from 2017 to present
# monthly_date_ranges = get_monthly_date_ranges(2017, datetime.now().year)
monthly_date_ranges = get_monthly_date_ranges(2016, 2025)

# Print the date ranges
for start_date, end_date in monthly_date_ranges:
    print(f"Start: {start_date}, End: {end_date}")

Start: 2016-01-01, End: 2016-01-31
Start: 2016-02-01, End: 2016-02-29
Start: 2016-03-01, End: 2016-03-31
Start: 2016-04-01, End: 2016-04-30
Start: 2016-05-01, End: 2016-05-31
Start: 2016-06-01, End: 2016-06-30
Start: 2016-07-01, End: 2016-07-31
Start: 2016-08-01, End: 2016-08-31
Start: 2016-09-01, End: 2016-09-30
Start: 2016-10-01, End: 2016-10-31
Start: 2016-11-01, End: 2016-11-30
Start: 2016-12-01, End: 2016-12-31
Start: 2017-01-01, End: 2017-01-31
Start: 2017-02-01, End: 2017-02-28
Start: 2017-03-01, End: 2017-03-31
Start: 2017-04-01, End: 2017-04-30
Start: 2017-05-01, End: 2017-05-31
Start: 2017-06-01, End: 2017-06-30
Start: 2017-07-01, End: 2017-07-31
Start: 2017-08-01, End: 2017-08-31
Start: 2017-09-01, End: 2017-09-30
Start: 2017-10-01, End: 2017-10-31
Start: 2017-11-01, End: 2017-11-30
Start: 2017-12-01, End: 2017-12-31
Start: 2018-01-01, End: 2018-01-31
Start: 2018-02-01, End: 2018-02-28
Start: 2018-03-01, End: 2018-03-31
Start: 2018-04-01, End: 2018-04-30
Start: 2018-05-01, E

In [9]:
# Asset Directory
asset_dir = "projects/jameswilliamchamberlain/assets/test/test_samarra_polygon_01"

task_ids = []

for start_date, end_date in monthly_date_ranges:
    task_id = export_s2_with_ndvi_to_drive_from_asset(
        asset_path=asset_dir,
        start_date=start_date,
        end_date=end_date,
        filename=f"s2_ndvi_samarra_{start_date}_{end_date}",
        folder="samarra_exports_l2_neu"
    )
    task_ids.append(task_id)

In [None]:
import time

failed_tasks = []

while len(task_ids) is not []:
    for task_id in task_ids:
        status = check_task_status(task_id=task_id)[0]

        if status['state'] == 'COMPLETED':
            print(f"Task {task_id} completed successfully. {len(task_ids)} tasks remaining.")
            task_ids.remove(task_id)
        elif status['state'] == 'FAILED' or status['state'] == 'CANCELLED' or status['state'] == 'CANCELING':
            print(f"Task {task_id} failed. {len(task_ids)} tasks remaining.")
            failed_tasks.append(task_id)
            task_ids.remove(task_id)
    time.sleep(120) # wait 2 min 

Task LNL2J2SD5O6FBBKQIV6CBVIM failed. 24 tasks remaining.
Task PM5O7SYFSOKWHLFMSA7CNTNE failed. 23 tasks remaining.
Task M72Z7XWNTE2ZSKJKFNI7OXQB failed. 22 tasks remaining.
Task 3DQG5AGD3K2J5RGJN4EF72BK failed. 21 tasks remaining.
Task JJ5MXXWW7WMAEJMQQI6WGINJ failed. 20 tasks remaining.
Task 6O46QDNL6T3OECXRKV5Q2MWV failed. 19 tasks remaining.
Task VX26X3FH2FTFXXELKJAIL26W failed. 18 tasks remaining.
Task X253AFFTGMGD6F5GDXI444EA failed. 17 tasks remaining.
Task N5UTPO3MSJ5OW62LPY2LVXWI failed. 16 tasks remaining.
Task CS5EN6MBKWXYGR3DBOD2R5QN failed. 15 tasks remaining.
Task 6TUFDPK6O6NDETPLMF4KD3E2 failed. 14 tasks remaining.


In [None]:
print(len(task_ids), "tasks failed to complete.")

In [None]:
print(task_ids[0])