## Import Packages

In [9]:
import sys
import os

import fiona
import geopandas as gpd
import rioxarray
from pathlib import Path
from shapely.geometry import mapping

In [10]:
rel_path = "../src"
directory_path = os.path.abspath(os.path.join(os.getcwd(), rel_path))
sys.path.append(directory_path)

from nex_gddp_cmip6 import get_nex_dataset, TIME_OPTIMIZED_SCENARIOS, AVAILABLE_VARIABLES

## Define Variables

In [2]:
folderpath = '../data'
filename = 'gdf_easternmountain_polygons'
random_state = 42

## Define Functions

In [5]:
def load_gdf(folderpath, filename):
    """
    Load a GeoDataFrame from the specified 'processed' directory.

    Parameters:
    - folderpath (str): The path to the main folder containing the 'processed' subfolder.
    - filename (str): The name of the file (with extension) to load from the 'processed' directory.

    Returns:
    - GeoDataFrame: A GeoDataFrame loaded from the specified file in the 'processed' subfolder.
    """
    # Create a Path object for folderpath to ensure correct path manipulation
    folder = Path(folderpath)

    # Construct the file path for the processed version of the file
    filepath = folder / 'processed' / filename
    
    # Load and return the GeoDataFrame
    return gpd.read_file(str(filepath))

In [7]:
def clip_dataset(dataset, geodataframe):
    """
    Clip a dataset by a GeoDataFrame's boundaries.

    Parameters:
    - dataset: The dataset to be clipped.
    - geodataframe: A GeoDataFrame that defines the region to clip.

    Returns:
    - The clipped dataset.
    """
    rio_dataset = dataset.rio.write_crs("EPSG:4326")
    return rio_dataset.rio.clip(geodataframe.geometry.apply(mapping), geodataframe.crs)

## Execute Functions

In [11]:
# Load datasets from S3
ds_historical = get_nex_dataset(AVAILABLE_VARIABLES,["historical"])
ds_projection = get_nex_dataset(AVAILABLE_VARIABLES,["projection"])

In [15]:
# Load GeoDataFrame
gdf = load_gdf(folderpath, filename)

# Clip datasets
ds_historical_clipped = clip_dataset(ds_historical, gdf)
ds_projection_clipped = clip_dataset(ds_projection, gdf)

In [16]:
ds_historical_clipped

Unnamed: 0,Array,Chunk
Bytes,860.36 MiB,271.69 MiB
Shape,"(20, 1, 23741, 19, 25)","(20, 1, 23741, 10, 15)"
Dask graph,6 chunks in 7 graph layers,6 chunks in 7 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 860.36 MiB 271.69 MiB Shape (20, 1, 23741, 19, 25) (20, 1, 23741, 10, 15) Dask graph 6 chunks in 7 graph layers Data type float32 numpy.ndarray",1  20  25  19  23741,

Unnamed: 0,Array,Chunk
Bytes,860.36 MiB,271.69 MiB
Shape,"(20, 1, 23741, 19, 25)","(20, 1, 23741, 10, 15)"
Dask graph,6 chunks in 7 graph layers,6 chunks in 7 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,860.36 MiB,271.69 MiB
Shape,"(20, 1, 23741, 19, 25)","(20, 1, 23741, 10, 15)"
Dask graph,6 chunks in 7 graph layers,6 chunks in 7 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 860.36 MiB 271.69 MiB Shape (20, 1, 23741, 19, 25) (20, 1, 23741, 10, 15) Dask graph 6 chunks in 7 graph layers Data type float32 numpy.ndarray",1  20  25  19  23741,

Unnamed: 0,Array,Chunk
Bytes,860.36 MiB,271.69 MiB
Shape,"(20, 1, 23741, 19, 25)","(20, 1, 23741, 10, 15)"
Dask graph,6 chunks in 7 graph layers,6 chunks in 7 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,860.36 MiB,271.69 MiB
Shape,"(20, 1, 23741, 19, 25)","(20, 1, 23741, 10, 15)"
Dask graph,6 chunks in 7 graph layers,6 chunks in 7 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 860.36 MiB 271.69 MiB Shape (20, 1, 23741, 19, 25) (20, 1, 23741, 10, 15) Dask graph 6 chunks in 7 graph layers Data type float32 numpy.ndarray",1  20  25  19  23741,

Unnamed: 0,Array,Chunk
Bytes,860.36 MiB,271.69 MiB
Shape,"(20, 1, 23741, 19, 25)","(20, 1, 23741, 10, 15)"
Dask graph,6 chunks in 7 graph layers,6 chunks in 7 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,860.36 MiB,271.69 MiB
Shape,"(20, 1, 23741, 19, 25)","(20, 1, 23741, 10, 15)"
Dask graph,6 chunks in 7 graph layers,6 chunks in 7 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 860.36 MiB 271.69 MiB Shape (20, 1, 23741, 19, 25) (20, 1, 23741, 10, 15) Dask graph 6 chunks in 7 graph layers Data type float32 numpy.ndarray",1  20  25  19  23741,

Unnamed: 0,Array,Chunk
Bytes,860.36 MiB,271.69 MiB
Shape,"(20, 1, 23741, 19, 25)","(20, 1, 23741, 10, 15)"
Dask graph,6 chunks in 7 graph layers,6 chunks in 7 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,860.36 MiB,271.69 MiB
Shape,"(20, 1, 23741, 19, 25)","(20, 1, 23741, 10, 15)"
Dask graph,6 chunks in 7 graph layers,6 chunks in 7 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 860.36 MiB 271.69 MiB Shape (20, 1, 23741, 19, 25) (20, 1, 23741, 10, 15) Dask graph 6 chunks in 7 graph layers Data type float32 numpy.ndarray",1  20  25  19  23741,

Unnamed: 0,Array,Chunk
Bytes,860.36 MiB,271.69 MiB
Shape,"(20, 1, 23741, 19, 25)","(20, 1, 23741, 10, 15)"
Dask graph,6 chunks in 7 graph layers,6 chunks in 7 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,860.36 MiB,271.69 MiB
Shape,"(20, 1, 23741, 19, 25)","(20, 1, 23741, 10, 15)"
Dask graph,6 chunks in 7 graph layers,6 chunks in 7 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 860.36 MiB 271.69 MiB Shape (20, 1, 23741, 19, 25) (20, 1, 23741, 10, 15) Dask graph 6 chunks in 7 graph layers Data type float32 numpy.ndarray",1  20  25  19  23741,

Unnamed: 0,Array,Chunk
Bytes,860.36 MiB,271.69 MiB
Shape,"(20, 1, 23741, 19, 25)","(20, 1, 23741, 10, 15)"
Dask graph,6 chunks in 7 graph layers,6 chunks in 7 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,860.36 MiB,271.69 MiB
Shape,"(20, 1, 23741, 19, 25)","(20, 1, 23741, 10, 15)"
Dask graph,6 chunks in 7 graph layers,6 chunks in 7 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 860.36 MiB 271.69 MiB Shape (20, 1, 23741, 19, 25) (20, 1, 23741, 10, 15) Dask graph 6 chunks in 7 graph layers Data type float32 numpy.ndarray",1  20  25  19  23741,

Unnamed: 0,Array,Chunk
Bytes,860.36 MiB,271.69 MiB
Shape,"(20, 1, 23741, 19, 25)","(20, 1, 23741, 10, 15)"
Dask graph,6 chunks in 7 graph layers,6 chunks in 7 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,860.36 MiB,271.69 MiB
Shape,"(20, 1, 23741, 19, 25)","(20, 1, 23741, 10, 15)"
Dask graph,6 chunks in 7 graph layers,6 chunks in 7 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 860.36 MiB 271.69 MiB Shape (20, 1, 23741, 19, 25) (20, 1, 23741, 10, 15) Dask graph 6 chunks in 7 graph layers Data type float32 numpy.ndarray",1  20  25  19  23741,

Unnamed: 0,Array,Chunk
Bytes,860.36 MiB,271.69 MiB
Shape,"(20, 1, 23741, 19, 25)","(20, 1, 23741, 10, 15)"
Dask graph,6 chunks in 7 graph layers,6 chunks in 7 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,860.36 MiB,271.69 MiB
Shape,"(20, 1, 23741, 19, 25)","(20, 1, 23741, 10, 15)"
Dask graph,6 chunks in 7 graph layers,6 chunks in 7 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 860.36 MiB 271.69 MiB Shape (20, 1, 23741, 19, 25) (20, 1, 23741, 10, 15) Dask graph 6 chunks in 7 graph layers Data type float32 numpy.ndarray",1  20  25  19  23741,

Unnamed: 0,Array,Chunk
Bytes,860.36 MiB,271.69 MiB
Shape,"(20, 1, 23741, 19, 25)","(20, 1, 23741, 10, 15)"
Dask graph,6 chunks in 7 graph layers,6 chunks in 7 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [17]:
ds_projection_clipped

Unnamed: 0,Array,Chunk
Bytes,4.45 GiB,575.15 MiB
Shape,"(20, 4, 31411, 19, 25)","(8, 4, 31411, 10, 15)"
Dask graph,18 chunks in 7 graph layers,18 chunks in 7 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 4.45 GiB 575.15 MiB Shape (20, 4, 31411, 19, 25) (8, 4, 31411, 10, 15) Dask graph 18 chunks in 7 graph layers Data type float32 numpy.ndarray",4  20  25  19  31411,

Unnamed: 0,Array,Chunk
Bytes,4.45 GiB,575.15 MiB
Shape,"(20, 4, 31411, 19, 25)","(8, 4, 31411, 10, 15)"
Dask graph,18 chunks in 7 graph layers,18 chunks in 7 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4.45 GiB,575.15 MiB
Shape,"(20, 4, 31411, 19, 25)","(8, 4, 31411, 10, 15)"
Dask graph,18 chunks in 7 graph layers,18 chunks in 7 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 4.45 GiB 575.15 MiB Shape (20, 4, 31411, 19, 25) (8, 4, 31411, 10, 15) Dask graph 18 chunks in 7 graph layers Data type float32 numpy.ndarray",4  20  25  19  31411,

Unnamed: 0,Array,Chunk
Bytes,4.45 GiB,575.15 MiB
Shape,"(20, 4, 31411, 19, 25)","(8, 4, 31411, 10, 15)"
Dask graph,18 chunks in 7 graph layers,18 chunks in 7 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4.45 GiB,575.15 MiB
Shape,"(20, 4, 31411, 19, 25)","(8, 4, 31411, 10, 15)"
Dask graph,18 chunks in 7 graph layers,18 chunks in 7 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 4.45 GiB 575.15 MiB Shape (20, 4, 31411, 19, 25) (8, 4, 31411, 10, 15) Dask graph 18 chunks in 7 graph layers Data type float32 numpy.ndarray",4  20  25  19  31411,

Unnamed: 0,Array,Chunk
Bytes,4.45 GiB,575.15 MiB
Shape,"(20, 4, 31411, 19, 25)","(8, 4, 31411, 10, 15)"
Dask graph,18 chunks in 7 graph layers,18 chunks in 7 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4.45 GiB,575.15 MiB
Shape,"(20, 4, 31411, 19, 25)","(8, 4, 31411, 10, 15)"
Dask graph,18 chunks in 7 graph layers,18 chunks in 7 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 4.45 GiB 575.15 MiB Shape (20, 4, 31411, 19, 25) (8, 4, 31411, 10, 15) Dask graph 18 chunks in 7 graph layers Data type float32 numpy.ndarray",4  20  25  19  31411,

Unnamed: 0,Array,Chunk
Bytes,4.45 GiB,575.15 MiB
Shape,"(20, 4, 31411, 19, 25)","(8, 4, 31411, 10, 15)"
Dask graph,18 chunks in 7 graph layers,18 chunks in 7 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4.45 GiB,575.15 MiB
Shape,"(20, 4, 31411, 19, 25)","(8, 4, 31411, 10, 15)"
Dask graph,18 chunks in 7 graph layers,18 chunks in 7 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 4.45 GiB 575.15 MiB Shape (20, 4, 31411, 19, 25) (8, 4, 31411, 10, 15) Dask graph 18 chunks in 7 graph layers Data type float32 numpy.ndarray",4  20  25  19  31411,

Unnamed: 0,Array,Chunk
Bytes,4.45 GiB,575.15 MiB
Shape,"(20, 4, 31411, 19, 25)","(8, 4, 31411, 10, 15)"
Dask graph,18 chunks in 7 graph layers,18 chunks in 7 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4.45 GiB,575.15 MiB
Shape,"(20, 4, 31411, 19, 25)","(8, 4, 31411, 10, 15)"
Dask graph,18 chunks in 7 graph layers,18 chunks in 7 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 4.45 GiB 575.15 MiB Shape (20, 4, 31411, 19, 25) (8, 4, 31411, 10, 15) Dask graph 18 chunks in 7 graph layers Data type float32 numpy.ndarray",4  20  25  19  31411,

Unnamed: 0,Array,Chunk
Bytes,4.45 GiB,575.15 MiB
Shape,"(20, 4, 31411, 19, 25)","(8, 4, 31411, 10, 15)"
Dask graph,18 chunks in 7 graph layers,18 chunks in 7 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4.45 GiB,575.15 MiB
Shape,"(20, 4, 31411, 19, 25)","(8, 4, 31411, 10, 15)"
Dask graph,18 chunks in 7 graph layers,18 chunks in 7 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 4.45 GiB 575.15 MiB Shape (20, 4, 31411, 19, 25) (8, 4, 31411, 10, 15) Dask graph 18 chunks in 7 graph layers Data type float32 numpy.ndarray",4  20  25  19  31411,

Unnamed: 0,Array,Chunk
Bytes,4.45 GiB,575.15 MiB
Shape,"(20, 4, 31411, 19, 25)","(8, 4, 31411, 10, 15)"
Dask graph,18 chunks in 7 graph layers,18 chunks in 7 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4.45 GiB,575.15 MiB
Shape,"(20, 4, 31411, 19, 25)","(8, 4, 31411, 10, 15)"
Dask graph,18 chunks in 7 graph layers,18 chunks in 7 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 4.45 GiB 575.15 MiB Shape (20, 4, 31411, 19, 25) (8, 4, 31411, 10, 15) Dask graph 18 chunks in 7 graph layers Data type float32 numpy.ndarray",4  20  25  19  31411,

Unnamed: 0,Array,Chunk
Bytes,4.45 GiB,575.15 MiB
Shape,"(20, 4, 31411, 19, 25)","(8, 4, 31411, 10, 15)"
Dask graph,18 chunks in 7 graph layers,18 chunks in 7 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4.45 GiB,575.15 MiB
Shape,"(20, 4, 31411, 19, 25)","(8, 4, 31411, 10, 15)"
Dask graph,18 chunks in 7 graph layers,18 chunks in 7 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 4.45 GiB 575.15 MiB Shape (20, 4, 31411, 19, 25) (8, 4, 31411, 10, 15) Dask graph 18 chunks in 7 graph layers Data type float32 numpy.ndarray",4  20  25  19  31411,

Unnamed: 0,Array,Chunk
Bytes,4.45 GiB,575.15 MiB
Shape,"(20, 4, 31411, 19, 25)","(8, 4, 31411, 10, 15)"
Dask graph,18 chunks in 7 graph layers,18 chunks in 7 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [19]:
# Calculate share of null values to confirm that the data is masked
ds_historical_clipped['tasmin'].isnull().sum().values / (len(ds_historical_clipped.lat) * len(ds_historical_clipped.lon) * len(ds_historical_clipped.time) * len(ds_historical_clipped.model))

0.7646286526306921