In [1]:
import dask
import ee
import geemap
import geopandas as gpd
import numpy as np
import pandas as pd
import plotly.express as px
from dask import compute, delayed
from dask.distributed import Client
from tqdm.notebook import tqdm

In [2]:
ee.Authenticate()

True

In [3]:
ee.Initialize()

In [4]:
start_date = "2000-01-01"
end_date = "2000-02-28"

In [5]:
spain_map = ee.FeatureCollection("FAO/GAUL/2015/level0").filter(
    ee.Filter.eq("ADM0_NAME", "Spain")
)

In [6]:
spain_provinces = gpd.read_file("../data/maps/spain_map.geojson")

In [7]:
spain_provinces = spain_provinces[
    spain_provinces["community"]
    != "Gibraltar. Territorio no asociado a ninguna autonomía"
]

In [8]:
terra_climate_dataset = (
    ee.ImageCollection("IDAHO_EPSCOR/TERRACLIMATE")
    .filterBounds(spain_map)
    .filterDate(start_date, end_date)
)

In [9]:
def get_monthly_data(feature):
    province_name = feature["properties"]["province"]
    province_geometry = ee.Geometry(feature["geometry"])

    def spatial_aggregation(image):
        bands = image.select(["pr", "pet"])

        # Apply reduceRegion once to get both mean values
        mean_values_dict = bands.reduceRegion(
            reducer=ee.Reducer.mean(),
            geometry=province_geometry,
            scale=4638.3,
        )

        date = image.date().format("YYYY-MM-dd")

        feature = ee.Feature(
            None,
            {
                "province": province_name,
                "date": date,
                "avg_precipitation": mean_values_dict.get("pr"),
                "avg_pet": mean_values_dict.get("pet"),
            },
        )

        return feature

    return terra_climate_dataset.map(spatial_aggregation)

In [10]:
def get_data_as_dataframe(feature):
    ee.Initialize()
    monthly_data = get_monthly_data(feature).getInfo()
    df = pd.DataFrame([r["properties"] for r in monthly_data["features"]])
    return df

# Dask stuff:

In [11]:
features_list = list(spain_provinces.iterfeatures())

In [12]:
client = Client()

Perhaps you already have a cluster running?
Hosting the HTTP server on port 52730 instead


In [13]:
client

0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: http://127.0.0.1:52730/status,

0,1
Dashboard: http://127.0.0.1:52730/status,Workers: 4
Total threads: 16,Total memory: 15.71 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:52731,Workers: 4
Dashboard: http://127.0.0.1:52730/status,Total threads: 16
Started: Just now,Total memory: 15.71 GiB

0,1
Comm: tcp://127.0.0.1:52757,Total threads: 4
Dashboard: http://127.0.0.1:52758/status,Memory: 3.93 GiB
Nanny: tcp://127.0.0.1:52734,
Local directory: C:\Users\RESLAN~1\AppData\Local\Temp\dask-scratch-space\worker-xgdc67na,Local directory: C:\Users\RESLAN~1\AppData\Local\Temp\dask-scratch-space\worker-xgdc67na

0,1
Comm: tcp://127.0.0.1:52751,Total threads: 4
Dashboard: http://127.0.0.1:52753/status,Memory: 3.93 GiB
Nanny: tcp://127.0.0.1:52735,
Local directory: C:\Users\RESLAN~1\AppData\Local\Temp\dask-scratch-space\worker-wrf2dy23,Local directory: C:\Users\RESLAN~1\AppData\Local\Temp\dask-scratch-space\worker-wrf2dy23

0,1
Comm: tcp://127.0.0.1:52750,Total threads: 4
Dashboard: http://127.0.0.1:52752/status,Memory: 3.93 GiB
Nanny: tcp://127.0.0.1:52736,
Local directory: C:\Users\RESLAN~1\AppData\Local\Temp\dask-scratch-space\worker-ut1jcn3p,Local directory: C:\Users\RESLAN~1\AppData\Local\Temp\dask-scratch-space\worker-ut1jcn3p

0,1
Comm: tcp://127.0.0.1:52756,Total threads: 4
Dashboard: http://127.0.0.1:52759/status,Memory: 3.93 GiB
Nanny: tcp://127.0.0.1:52737,
Local directory: C:\Users\RESLAN~1\AppData\Local\Temp\dask-scratch-space\worker-vs4rfof5,Local directory: C:\Users\RESLAN~1\AppData\Local\Temp\dask-scratch-space\worker-vs4rfof5


In [14]:
delayed_results = [delayed(get_data_as_dataframe)(feature) for feature in features_list]

In [15]:
results = compute(*delayed_results)

This may cause some slowdown.
Consider scattering data ahead of time and using futures.


In [None]:
# client.close()

In [17]:
pd.concat(results)

Unnamed: 0,avg_pet,avg_precipitation,date,province
0,142.446771,22.252368,2000-01-01,Araba/Álava
1,350.056457,23.672784,2000-02-01,Araba/Álava
0,280.206665,21.396714,2000-01-01,Albacete
1,511.694386,0.332510,2000-02-01,Albacete
0,351.700535,23.740549,2000-01-01,Alacant/Alicante
...,...,...,...,...
1,598.714286,0.000000,2000-02-01,Territorios no asociados a ninguna provincia
0,717.599079,48.837677,2000-01-01,Las Palmas
1,919.577949,18.840789,2000-02-01,Las Palmas
0,702.556022,72.590353,2000-01-01,Santa Cruz de Tenerife
