In [1]:
# Basic plots
%matplotlib inline
import matplotlib.pyplot as plt
# plt.rcParams['figure.figsize'] = [12, 8]

# Common imports and settings
import os, sys
os.environ['USE_PYGEOS'] = '0'
from IPython.display import Markdown
import pandas as pd
pd.set_option("display.max_rows", None)
import xarray as xr

# Datacube
import datacube
from datacube.utils.rio import configure_s3_access
from datacube.utils import masking
from datacube.utils.cog import write_cog
# https://github.com/GeoscienceAustralia/dea-notebooks/tree/develop/Tools
from dea_tools.plotting import display_map, rgb
from dea_tools.datahandling import mostcommon_crs

# EASI defaults
easinotebooksrepo = '/home/jovyan/easi-notebooks'
if easinotebooksrepo not in sys.path: sys.path.append(easinotebooksrepo)
from easi_tools import EasiDefaults, xarray_object_size, notebook_utils

In [2]:
# Data tools
import numpy as np
from datetime import datetime

# Datacube
from datacube.utils import masking  # https://github.com/opendatacube/datacube-core/blob/develop/datacube/utils/masking.py
from odc.algo import enum_to_bool   # https://github.com/opendatacube/odc-tools/blob/develop/libs/algo/odc/algo/_masking.py
from odc.algo import xr_reproject   # https://github.com/opendatacube/odc-tools/blob/develop/libs/algo/odc/algo/_warp.py
from datacube.utils.geometry import GeoBox, box  # https://github.com/opendatacube/datacube-core/blob/develop/datacube/utils/geometry/_base.py

# Holoviews, Datashader and Bokeh
import hvplot.pandas
import hvplot.xarray
import holoviews as hv
import panel as pn
import colorcet as cc
import cartopy.crs as ccrs
from datashader import reductions
from holoviews import opts
# import geoviews as gv
# from holoviews.operation.datashader import rasterize
hv.extension('bokeh', logo=False)

# Dask
from dask.distributed import Client, LocalCluster

In [3]:
easi = EasiDefaults()

family = 'sentinel-2'
product = easi.product(family)
display(Markdown(f'Default {family} product for "{easi.name}": [{product}]({easi.explorer}/products/{product})'))

Successfully found configuration for deployment "asia"


Default sentinel-2 product for "asia": [s2_l2a](https://explorer.asia.easi-eo.solutions/products/s2_l2a)

In [4]:
# Local cluster
# Default is to run on a compute node with 28 GiB of available memory and 8 cores.
# We'll make that explicit here .. but this should be adjusted based on your workflow

cluster = LocalCluster(n_workers=2, threads_per_worker=4)
# cluster.scale(n=2, memory="14GiB")
cluster.scale(n=4, memory="6GiB")
client = Client(cluster)
display(client)

dashboard_address = notebook_utils.localcluster_dashboard(client=client,server=easi.hub)
display(dashboard_address)

0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: http://127.0.0.1:8787/status,

0,1
Dashboard: http://127.0.0.1:8787/status,Workers: 4
Total threads: 16,Total memory: 248.00 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:44921,Workers: 4
Dashboard: http://127.0.0.1:8787/status,Total threads: 16
Started: Just now,Total memory: 248.00 GiB

0,1
Comm: tcp://127.0.0.1:43371,Total threads: 4
Dashboard: http://127.0.0.1:43661/status,Memory: 62.00 GiB
Nanny: tcp://127.0.0.1:35467,
Local directory: /tmp/dask-scratch-space/worker-dfv0eilo,Local directory: /tmp/dask-scratch-space/worker-dfv0eilo

0,1
Comm: tcp://127.0.0.1:43885,Total threads: 4
Dashboard: http://127.0.0.1:39933/status,Memory: 62.00 GiB
Nanny: tcp://127.0.0.1:41917,
Local directory: /tmp/dask-scratch-space/worker-mkpj9qdz,Local directory: /tmp/dask-scratch-space/worker-mkpj9qdz

0,1
Comm: tcp://127.0.0.1:33653,Total threads: 4
Dashboard: http://127.0.0.1:45741/status,Memory: 62.00 GiB
Nanny: tcp://127.0.0.1:44455,
Local directory: /tmp/dask-scratch-space/worker-oiimis98,Local directory: /tmp/dask-scratch-space/worker-oiimis98

0,1
Comm: tcp://127.0.0.1:41073,Total threads: 4
Dashboard: http://127.0.0.1:32971/status,Memory: 62.00 GiB
Nanny: tcp://127.0.0.1:34477,
Local directory: /tmp/dask-scratch-space/worker-exrtoamd,Local directory: /tmp/dask-scratch-space/worker-exrtoamd


'https://hub.asia.easi-eo.solutions/user/dhnghia/proxy/8787/status'

In [5]:
dc = datacube.Datacube()

# Access AWS "requester-pays" buckets
# This is necessary for reading data from most third-party AWS S3 buckets such as for Landsat and Sentinel-2
configure_s3_access(aws_unsigned=False, requester_pays=True, client=client);

In [6]:
from utils import load_data_geo
import geopandas as gpd
from deafrica_tools.areaofinterest import define_area
from datacube.utils.geometry import Geometry
import xarray as xr
train_path = "train/Soc Trang_Traning.shp"
train = load_data_geo(train_path)
train.head()

Unnamed: 0,Name,geometry
0,Water1,POINT (620223.628 1072681.101)
1,Water1,POINT (621057.521 1071074.602)
2,Water1,POINT (621891.602 1069430.621)
3,Water1,POINT (622762.564 1068048.689)
4,Water1,POINT (623256.484 1066777.765)


In [162]:
train1 = train.to_crs('EPSG:4326')

In [9]:
train1.head().explore(column="Name", legend=True)

In [None]:
# Proposed workflow
# 1. get bounding polygon for all training data points
# 2. dc.load with dask for bounding polygon (and all times when you're ready to try that)
#     - consider also remapping S2 data to lat/lon projection (e.g., epsg:4326) - may not be necessary
# 2a. apply S2 masking, scale, offset
# 3. calculate NDVI (still in dask so its a "virtual" on-demand calculation)
# 3a. use xarray.persist() to pre-calculate NDVI for all pixels in our bounding polygon
#     - more efficient to read and process all pixels than process each training point
# 4. for idx, point in train.iterrows():
#     -  get points from xarray (dask)
#        need to convert point lat/lon to S2 UTM or dc.load into epsg:4326
#        xarray data in S2 UTM project (output_crs, resolution)
#        point data in epsg:4326 (train.crs)
#     -  Store the loaded point data in the dictionary with a key based on the point index

In [203]:
from deafrica_tools.bandindices import calculate_indices
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder

# Vietnam
min_longitude, max_longitude = (105.5, 106.4)
min_latitude, max_latitude = (9.2, 10.0)
min_date = '2021-12-01' # 2021-11-01
max_date = '2022-01-01' # 2022-01-01
product = 's2_l2a'

query1 = {
    'product': product,                     # Product name
    'x': (min_longitude, max_longitude),    # "x" axis bounds
    'y': (min_latitude, max_latitude),      # "y" axis bounds
    'time': (min_date, max_date),           # Any parsable date strings
}

# Most common CRS
native_crs = notebook_utils.mostcommon_crs(dc, query1)

query1.update({
    'output_crs': native_crs,               # EPSG code
    'resolution': (-10, 10),                # Target resolution
    'group_by': 'solar_day',                # Scene ordering
    'dask_chunks': {'x': 2048, 'y': 2048},  # Dask chunks
})


In [8]:
# Load data
data = dc.load(**query1)

notebook_utils.heading(notebook_utils.xarray_object_size(data))
display(data)

# Calculate valid (not nodata) masks for each layer
valid_mask = masking.valid_data_mask(data)
notebook_utils.heading('Valid data masks for each variable')
display(valid_mask)

Unnamed: 0,Array,Chunk
Bytes,6.06 GiB,8.00 MiB
Shape,"(37, 8874, 9902)","(1, 2048, 2048)"
Dask graph,925 chunks in 1 graph layer,925 chunks in 1 graph layer
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray
"Array Chunk Bytes 6.06 GiB 8.00 MiB Shape (37, 8874, 9902) (1, 2048, 2048) Dask graph 925 chunks in 1 graph layer Data type uint16 numpy.ndarray",9902  8874  37,

Unnamed: 0,Array,Chunk
Bytes,6.06 GiB,8.00 MiB
Shape,"(37, 8874, 9902)","(1, 2048, 2048)"
Dask graph,925 chunks in 1 graph layer,925 chunks in 1 graph layer
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,6.06 GiB,8.00 MiB
Shape,"(37, 8874, 9902)","(1, 2048, 2048)"
Dask graph,925 chunks in 1 graph layer,925 chunks in 1 graph layer
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray
"Array Chunk Bytes 6.06 GiB 8.00 MiB Shape (37, 8874, 9902) (1, 2048, 2048) Dask graph 925 chunks in 1 graph layer Data type uint16 numpy.ndarray",9902  8874  37,

Unnamed: 0,Array,Chunk
Bytes,6.06 GiB,8.00 MiB
Shape,"(37, 8874, 9902)","(1, 2048, 2048)"
Dask graph,925 chunks in 1 graph layer,925 chunks in 1 graph layer
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,6.06 GiB,8.00 MiB
Shape,"(37, 8874, 9902)","(1, 2048, 2048)"
Dask graph,925 chunks in 1 graph layer,925 chunks in 1 graph layer
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray
"Array Chunk Bytes 6.06 GiB 8.00 MiB Shape (37, 8874, 9902) (1, 2048, 2048) Dask graph 925 chunks in 1 graph layer Data type uint16 numpy.ndarray",9902  8874  37,

Unnamed: 0,Array,Chunk
Bytes,6.06 GiB,8.00 MiB
Shape,"(37, 8874, 9902)","(1, 2048, 2048)"
Dask graph,925 chunks in 1 graph layer,925 chunks in 1 graph layer
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,6.06 GiB,8.00 MiB
Shape,"(37, 8874, 9902)","(1, 2048, 2048)"
Dask graph,925 chunks in 1 graph layer,925 chunks in 1 graph layer
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray
"Array Chunk Bytes 6.06 GiB 8.00 MiB Shape (37, 8874, 9902) (1, 2048, 2048) Dask graph 925 chunks in 1 graph layer Data type uint16 numpy.ndarray",9902  8874  37,

Unnamed: 0,Array,Chunk
Bytes,6.06 GiB,8.00 MiB
Shape,"(37, 8874, 9902)","(1, 2048, 2048)"
Dask graph,925 chunks in 1 graph layer,925 chunks in 1 graph layer
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,6.06 GiB,8.00 MiB
Shape,"(37, 8874, 9902)","(1, 2048, 2048)"
Dask graph,925 chunks in 1 graph layer,925 chunks in 1 graph layer
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray
"Array Chunk Bytes 6.06 GiB 8.00 MiB Shape (37, 8874, 9902) (1, 2048, 2048) Dask graph 925 chunks in 1 graph layer Data type uint16 numpy.ndarray",9902  8874  37,

Unnamed: 0,Array,Chunk
Bytes,6.06 GiB,8.00 MiB
Shape,"(37, 8874, 9902)","(1, 2048, 2048)"
Dask graph,925 chunks in 1 graph layer,925 chunks in 1 graph layer
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,6.06 GiB,8.00 MiB
Shape,"(37, 8874, 9902)","(1, 2048, 2048)"
Dask graph,925 chunks in 1 graph layer,925 chunks in 1 graph layer
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray
"Array Chunk Bytes 6.06 GiB 8.00 MiB Shape (37, 8874, 9902) (1, 2048, 2048) Dask graph 925 chunks in 1 graph layer Data type uint16 numpy.ndarray",9902  8874  37,

Unnamed: 0,Array,Chunk
Bytes,6.06 GiB,8.00 MiB
Shape,"(37, 8874, 9902)","(1, 2048, 2048)"
Dask graph,925 chunks in 1 graph layer,925 chunks in 1 graph layer
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,6.06 GiB,8.00 MiB
Shape,"(37, 8874, 9902)","(1, 2048, 2048)"
Dask graph,925 chunks in 1 graph layer,925 chunks in 1 graph layer
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray
"Array Chunk Bytes 6.06 GiB 8.00 MiB Shape (37, 8874, 9902) (1, 2048, 2048) Dask graph 925 chunks in 1 graph layer Data type uint16 numpy.ndarray",9902  8874  37,

Unnamed: 0,Array,Chunk
Bytes,6.06 GiB,8.00 MiB
Shape,"(37, 8874, 9902)","(1, 2048, 2048)"
Dask graph,925 chunks in 1 graph layer,925 chunks in 1 graph layer
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,6.06 GiB,8.00 MiB
Shape,"(37, 8874, 9902)","(1, 2048, 2048)"
Dask graph,925 chunks in 1 graph layer,925 chunks in 1 graph layer
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray
"Array Chunk Bytes 6.06 GiB 8.00 MiB Shape (37, 8874, 9902) (1, 2048, 2048) Dask graph 925 chunks in 1 graph layer Data type uint16 numpy.ndarray",9902  8874  37,

Unnamed: 0,Array,Chunk
Bytes,6.06 GiB,8.00 MiB
Shape,"(37, 8874, 9902)","(1, 2048, 2048)"
Dask graph,925 chunks in 1 graph layer,925 chunks in 1 graph layer
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,6.06 GiB,8.00 MiB
Shape,"(37, 8874, 9902)","(1, 2048, 2048)"
Dask graph,925 chunks in 1 graph layer,925 chunks in 1 graph layer
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray
"Array Chunk Bytes 6.06 GiB 8.00 MiB Shape (37, 8874, 9902) (1, 2048, 2048) Dask graph 925 chunks in 1 graph layer Data type uint16 numpy.ndarray",9902  8874  37,

Unnamed: 0,Array,Chunk
Bytes,6.06 GiB,8.00 MiB
Shape,"(37, 8874, 9902)","(1, 2048, 2048)"
Dask graph,925 chunks in 1 graph layer,925 chunks in 1 graph layer
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,6.06 GiB,8.00 MiB
Shape,"(37, 8874, 9902)","(1, 2048, 2048)"
Dask graph,925 chunks in 1 graph layer,925 chunks in 1 graph layer
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray
"Array Chunk Bytes 6.06 GiB 8.00 MiB Shape (37, 8874, 9902) (1, 2048, 2048) Dask graph 925 chunks in 1 graph layer Data type uint16 numpy.ndarray",9902  8874  37,

Unnamed: 0,Array,Chunk
Bytes,6.06 GiB,8.00 MiB
Shape,"(37, 8874, 9902)","(1, 2048, 2048)"
Dask graph,925 chunks in 1 graph layer,925 chunks in 1 graph layer
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,6.06 GiB,8.00 MiB
Shape,"(37, 8874, 9902)","(1, 2048, 2048)"
Dask graph,925 chunks in 1 graph layer,925 chunks in 1 graph layer
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray
"Array Chunk Bytes 6.06 GiB 8.00 MiB Shape (37, 8874, 9902) (1, 2048, 2048) Dask graph 925 chunks in 1 graph layer Data type uint16 numpy.ndarray",9902  8874  37,

Unnamed: 0,Array,Chunk
Bytes,6.06 GiB,8.00 MiB
Shape,"(37, 8874, 9902)","(1, 2048, 2048)"
Dask graph,925 chunks in 1 graph layer,925 chunks in 1 graph layer
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,6.06 GiB,8.00 MiB
Shape,"(37, 8874, 9902)","(1, 2048, 2048)"
Dask graph,925 chunks in 1 graph layer,925 chunks in 1 graph layer
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray
"Array Chunk Bytes 6.06 GiB 8.00 MiB Shape (37, 8874, 9902) (1, 2048, 2048) Dask graph 925 chunks in 1 graph layer Data type uint16 numpy.ndarray",9902  8874  37,

Unnamed: 0,Array,Chunk
Bytes,6.06 GiB,8.00 MiB
Shape,"(37, 8874, 9902)","(1, 2048, 2048)"
Dask graph,925 chunks in 1 graph layer,925 chunks in 1 graph layer
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.03 GiB,4.00 MiB
Shape,"(37, 8874, 9902)","(1, 2048, 2048)"
Dask graph,925 chunks in 1 graph layer,925 chunks in 1 graph layer
Data type,uint8 numpy.ndarray,uint8 numpy.ndarray
"Array Chunk Bytes 3.03 GiB 4.00 MiB Shape (37, 8874, 9902) (1, 2048, 2048) Dask graph 925 chunks in 1 graph layer Data type uint8 numpy.ndarray",9902  8874  37,

Unnamed: 0,Array,Chunk
Bytes,3.03 GiB,4.00 MiB
Shape,"(37, 8874, 9902)","(1, 2048, 2048)"
Dask graph,925 chunks in 1 graph layer,925 chunks in 1 graph layer
Data type,uint8 numpy.ndarray,uint8 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,6.06 GiB,8.00 MiB
Shape,"(37, 8874, 9902)","(1, 2048, 2048)"
Dask graph,925 chunks in 1 graph layer,925 chunks in 1 graph layer
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray
"Array Chunk Bytes 6.06 GiB 8.00 MiB Shape (37, 8874, 9902) (1, 2048, 2048) Dask graph 925 chunks in 1 graph layer Data type uint16 numpy.ndarray",9902  8874  37,

Unnamed: 0,Array,Chunk
Bytes,6.06 GiB,8.00 MiB
Shape,"(37, 8874, 9902)","(1, 2048, 2048)"
Dask graph,925 chunks in 1 graph layer,925 chunks in 1 graph layer
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,6.06 GiB,8.00 MiB
Shape,"(37, 8874, 9902)","(1, 2048, 2048)"
Dask graph,925 chunks in 1 graph layer,925 chunks in 1 graph layer
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray
"Array Chunk Bytes 6.06 GiB 8.00 MiB Shape (37, 8874, 9902) (1, 2048, 2048) Dask graph 925 chunks in 1 graph layer Data type uint16 numpy.ndarray",9902  8874  37,

Unnamed: 0,Array,Chunk
Bytes,6.06 GiB,8.00 MiB
Shape,"(37, 8874, 9902)","(1, 2048, 2048)"
Dask graph,925 chunks in 1 graph layer,925 chunks in 1 graph layer
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray


Unnamed: 0,Array,Chunk
Bytes,3.03 GiB,4.00 MiB
Shape,"(37, 8874, 9902)","(1, 2048, 2048)"
Dask graph,925 chunks in 5 graph layers,925 chunks in 5 graph layers
Data type,bool numpy.ndarray,bool numpy.ndarray
"Array Chunk Bytes 3.03 GiB 4.00 MiB Shape (37, 8874, 9902) (1, 2048, 2048) Dask graph 925 chunks in 5 graph layers Data type bool numpy.ndarray",9902  8874  37,

Unnamed: 0,Array,Chunk
Bytes,3.03 GiB,4.00 MiB
Shape,"(37, 8874, 9902)","(1, 2048, 2048)"
Dask graph,925 chunks in 5 graph layers,925 chunks in 5 graph layers
Data type,bool numpy.ndarray,bool numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.03 GiB,4.00 MiB
Shape,"(37, 8874, 9902)","(1, 2048, 2048)"
Dask graph,925 chunks in 5 graph layers,925 chunks in 5 graph layers
Data type,bool numpy.ndarray,bool numpy.ndarray
"Array Chunk Bytes 3.03 GiB 4.00 MiB Shape (37, 8874, 9902) (1, 2048, 2048) Dask graph 925 chunks in 5 graph layers Data type bool numpy.ndarray",9902  8874  37,

Unnamed: 0,Array,Chunk
Bytes,3.03 GiB,4.00 MiB
Shape,"(37, 8874, 9902)","(1, 2048, 2048)"
Dask graph,925 chunks in 5 graph layers,925 chunks in 5 graph layers
Data type,bool numpy.ndarray,bool numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.03 GiB,4.00 MiB
Shape,"(37, 8874, 9902)","(1, 2048, 2048)"
Dask graph,925 chunks in 5 graph layers,925 chunks in 5 graph layers
Data type,bool numpy.ndarray,bool numpy.ndarray
"Array Chunk Bytes 3.03 GiB 4.00 MiB Shape (37, 8874, 9902) (1, 2048, 2048) Dask graph 925 chunks in 5 graph layers Data type bool numpy.ndarray",9902  8874  37,

Unnamed: 0,Array,Chunk
Bytes,3.03 GiB,4.00 MiB
Shape,"(37, 8874, 9902)","(1, 2048, 2048)"
Dask graph,925 chunks in 5 graph layers,925 chunks in 5 graph layers
Data type,bool numpy.ndarray,bool numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.03 GiB,4.00 MiB
Shape,"(37, 8874, 9902)","(1, 2048, 2048)"
Dask graph,925 chunks in 5 graph layers,925 chunks in 5 graph layers
Data type,bool numpy.ndarray,bool numpy.ndarray
"Array Chunk Bytes 3.03 GiB 4.00 MiB Shape (37, 8874, 9902) (1, 2048, 2048) Dask graph 925 chunks in 5 graph layers Data type bool numpy.ndarray",9902  8874  37,

Unnamed: 0,Array,Chunk
Bytes,3.03 GiB,4.00 MiB
Shape,"(37, 8874, 9902)","(1, 2048, 2048)"
Dask graph,925 chunks in 5 graph layers,925 chunks in 5 graph layers
Data type,bool numpy.ndarray,bool numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.03 GiB,4.00 MiB
Shape,"(37, 8874, 9902)","(1, 2048, 2048)"
Dask graph,925 chunks in 5 graph layers,925 chunks in 5 graph layers
Data type,bool numpy.ndarray,bool numpy.ndarray
"Array Chunk Bytes 3.03 GiB 4.00 MiB Shape (37, 8874, 9902) (1, 2048, 2048) Dask graph 925 chunks in 5 graph layers Data type bool numpy.ndarray",9902  8874  37,

Unnamed: 0,Array,Chunk
Bytes,3.03 GiB,4.00 MiB
Shape,"(37, 8874, 9902)","(1, 2048, 2048)"
Dask graph,925 chunks in 5 graph layers,925 chunks in 5 graph layers
Data type,bool numpy.ndarray,bool numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.03 GiB,4.00 MiB
Shape,"(37, 8874, 9902)","(1, 2048, 2048)"
Dask graph,925 chunks in 5 graph layers,925 chunks in 5 graph layers
Data type,bool numpy.ndarray,bool numpy.ndarray
"Array Chunk Bytes 3.03 GiB 4.00 MiB Shape (37, 8874, 9902) (1, 2048, 2048) Dask graph 925 chunks in 5 graph layers Data type bool numpy.ndarray",9902  8874  37,

Unnamed: 0,Array,Chunk
Bytes,3.03 GiB,4.00 MiB
Shape,"(37, 8874, 9902)","(1, 2048, 2048)"
Dask graph,925 chunks in 5 graph layers,925 chunks in 5 graph layers
Data type,bool numpy.ndarray,bool numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.03 GiB,4.00 MiB
Shape,"(37, 8874, 9902)","(1, 2048, 2048)"
Dask graph,925 chunks in 5 graph layers,925 chunks in 5 graph layers
Data type,bool numpy.ndarray,bool numpy.ndarray
"Array Chunk Bytes 3.03 GiB 4.00 MiB Shape (37, 8874, 9902) (1, 2048, 2048) Dask graph 925 chunks in 5 graph layers Data type bool numpy.ndarray",9902  8874  37,

Unnamed: 0,Array,Chunk
Bytes,3.03 GiB,4.00 MiB
Shape,"(37, 8874, 9902)","(1, 2048, 2048)"
Dask graph,925 chunks in 5 graph layers,925 chunks in 5 graph layers
Data type,bool numpy.ndarray,bool numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.03 GiB,4.00 MiB
Shape,"(37, 8874, 9902)","(1, 2048, 2048)"
Dask graph,925 chunks in 5 graph layers,925 chunks in 5 graph layers
Data type,bool numpy.ndarray,bool numpy.ndarray
"Array Chunk Bytes 3.03 GiB 4.00 MiB Shape (37, 8874, 9902) (1, 2048, 2048) Dask graph 925 chunks in 5 graph layers Data type bool numpy.ndarray",9902  8874  37,

Unnamed: 0,Array,Chunk
Bytes,3.03 GiB,4.00 MiB
Shape,"(37, 8874, 9902)","(1, 2048, 2048)"
Dask graph,925 chunks in 5 graph layers,925 chunks in 5 graph layers
Data type,bool numpy.ndarray,bool numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.03 GiB,4.00 MiB
Shape,"(37, 8874, 9902)","(1, 2048, 2048)"
Dask graph,925 chunks in 5 graph layers,925 chunks in 5 graph layers
Data type,bool numpy.ndarray,bool numpy.ndarray
"Array Chunk Bytes 3.03 GiB 4.00 MiB Shape (37, 8874, 9902) (1, 2048, 2048) Dask graph 925 chunks in 5 graph layers Data type bool numpy.ndarray",9902  8874  37,

Unnamed: 0,Array,Chunk
Bytes,3.03 GiB,4.00 MiB
Shape,"(37, 8874, 9902)","(1, 2048, 2048)"
Dask graph,925 chunks in 5 graph layers,925 chunks in 5 graph layers
Data type,bool numpy.ndarray,bool numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.03 GiB,4.00 MiB
Shape,"(37, 8874, 9902)","(1, 2048, 2048)"
Dask graph,925 chunks in 5 graph layers,925 chunks in 5 graph layers
Data type,bool numpy.ndarray,bool numpy.ndarray
"Array Chunk Bytes 3.03 GiB 4.00 MiB Shape (37, 8874, 9902) (1, 2048, 2048) Dask graph 925 chunks in 5 graph layers Data type bool numpy.ndarray",9902  8874  37,

Unnamed: 0,Array,Chunk
Bytes,3.03 GiB,4.00 MiB
Shape,"(37, 8874, 9902)","(1, 2048, 2048)"
Dask graph,925 chunks in 5 graph layers,925 chunks in 5 graph layers
Data type,bool numpy.ndarray,bool numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.03 GiB,4.00 MiB
Shape,"(37, 8874, 9902)","(1, 2048, 2048)"
Dask graph,925 chunks in 5 graph layers,925 chunks in 5 graph layers
Data type,bool numpy.ndarray,bool numpy.ndarray
"Array Chunk Bytes 3.03 GiB 4.00 MiB Shape (37, 8874, 9902) (1, 2048, 2048) Dask graph 925 chunks in 5 graph layers Data type bool numpy.ndarray",9902  8874  37,

Unnamed: 0,Array,Chunk
Bytes,3.03 GiB,4.00 MiB
Shape,"(37, 8874, 9902)","(1, 2048, 2048)"
Dask graph,925 chunks in 5 graph layers,925 chunks in 5 graph layers
Data type,bool numpy.ndarray,bool numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.03 GiB,4.00 MiB
Shape,"(37, 8874, 9902)","(1, 2048, 2048)"
Dask graph,925 chunks in 5 graph layers,925 chunks in 5 graph layers
Data type,bool numpy.ndarray,bool numpy.ndarray
"Array Chunk Bytes 3.03 GiB 4.00 MiB Shape (37, 8874, 9902) (1, 2048, 2048) Dask graph 925 chunks in 5 graph layers Data type bool numpy.ndarray",9902  8874  37,

Unnamed: 0,Array,Chunk
Bytes,3.03 GiB,4.00 MiB
Shape,"(37, 8874, 9902)","(1, 2048, 2048)"
Dask graph,925 chunks in 5 graph layers,925 chunks in 5 graph layers
Data type,bool numpy.ndarray,bool numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.03 GiB,4.00 MiB
Shape,"(37, 8874, 9902)","(1, 2048, 2048)"
Dask graph,925 chunks in 5 graph layers,925 chunks in 5 graph layers
Data type,bool numpy.ndarray,bool numpy.ndarray
"Array Chunk Bytes 3.03 GiB 4.00 MiB Shape (37, 8874, 9902) (1, 2048, 2048) Dask graph 925 chunks in 5 graph layers Data type bool numpy.ndarray",9902  8874  37,

Unnamed: 0,Array,Chunk
Bytes,3.03 GiB,4.00 MiB
Shape,"(37, 8874, 9902)","(1, 2048, 2048)"
Dask graph,925 chunks in 5 graph layers,925 chunks in 5 graph layers
Data type,bool numpy.ndarray,bool numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.03 GiB,4.00 MiB
Shape,"(37, 8874, 9902)","(1, 2048, 2048)"
Dask graph,925 chunks in 5 graph layers,925 chunks in 5 graph layers
Data type,bool numpy.ndarray,bool numpy.ndarray
"Array Chunk Bytes 3.03 GiB 4.00 MiB Shape (37, 8874, 9902) (1, 2048, 2048) Dask graph 925 chunks in 5 graph layers Data type bool numpy.ndarray",9902  8874  37,

Unnamed: 0,Array,Chunk
Bytes,3.03 GiB,4.00 MiB
Shape,"(37, 8874, 9902)","(1, 2048, 2048)"
Dask graph,925 chunks in 5 graph layers,925 chunks in 5 graph layers
Data type,bool numpy.ndarray,bool numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.03 GiB,4.00 MiB
Shape,"(37, 8874, 9902)","(1, 2048, 2048)"
Dask graph,925 chunks in 5 graph layers,925 chunks in 5 graph layers
Data type,bool numpy.ndarray,bool numpy.ndarray
"Array Chunk Bytes 3.03 GiB 4.00 MiB Shape (37, 8874, 9902) (1, 2048, 2048) Dask graph 925 chunks in 5 graph layers Data type bool numpy.ndarray",9902  8874  37,

Unnamed: 0,Array,Chunk
Bytes,3.03 GiB,4.00 MiB
Shape,"(37, 8874, 9902)","(1, 2048, 2048)"
Dask graph,925 chunks in 5 graph layers,925 chunks in 5 graph layers
Data type,bool numpy.ndarray,bool numpy.ndarray


In [204]:
data1 = dc.load(**query1)

In [9]:
# Measurement definitions for the selected product
measurement_info = dc.list_measurements().loc[query1['product']]
notebook_utils.heading(f'Measurement table for product: {query1["product"]}')
display(measurement_info)

# Separate lists of measurement names and flag names
measurement_names = measurement_info[pd.isnull(measurement_info.flags_definition)].index
flag_names        = measurement_info[pd.notnull(measurement_info.flags_definition)].index

notebook_utils.heading('Selected Measurement and Flag names')
display(pd.DataFrame({
    'group': ['Measurement names', 'Flag names'],
    'names': [', '.join(measurement_names), ', '.join(flag_names)]
}))

# Flag definitions
for flag in flag_names:
    notebook_utils.heading(f'Flag definition table for flag name: {flag}')
    display(masking.describe_variable_flags(data[flag]))

Unnamed: 0_level_0,name,dtype,units,nodata,flags_definition,aliases,add_offset,scale_factor
measurement,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
coastal,coastal,uint16,1,0,,"[band_01, B01, coastal_aerosol]",-0.1,0.0001
blue,blue,uint16,1,0,,"[band_02, B02]",-0.1,0.0001
green,green,uint16,1,0,,"[band_03, B03]",-0.1,0.0001
red,red,uint16,1,0,,"[band_04, B04]",-0.1,0.0001
rededge1,rededge1,uint16,1,0,,"[band_05, B05, red_edge_1]",-0.1,0.0001
rededge2,rededge2,uint16,1,0,,"[band_06, B06, red_edge_2]",-0.1,0.0001
rededge3,rededge3,uint16,1,0,,"[band_07, B07, red_edge_3]",-0.1,0.0001
nir,nir,uint16,1,0,,"[band_08, B08, nir_1]",-0.1,0.0001
nir08,nir08,uint16,1,0,,"[band_8a, B8A, nir_2]",-0.1,0.0001
nir09,nir09,uint16,1,0,,"[band_09, B09, nir_3]",-0.1,0.0001


Unnamed: 0,group,names
0,Measurement names,"coastal, blue, green, red, rededge1, rededge2,..."
1,Flag names,scl


Unnamed: 0,bits,values,description
qa,"[0, 1, 2, 3, 4, 5, 6, 7]","{'0': 'no data', '1': 'saturated or defective'...",Sen2Cor Scene Classification


In [10]:
flags_def = masking.describe_variable_flags(data[flag]).values
flags_def = flags_def.tolist()[0][1]
# Make SCL flags image
flag_name = 'scl'
flag_data = data[[flag_name]].where(valid_mask[flag_name]).persist()   # Dataset
display(flag_data)
# Create Mask layer

good_pixel_flags = [flags_def[str(i)] for i in [4, 5, 6]]

good_pixel_mask = enum_to_bool(data[flag_name], good_pixel_flags)  # -> DataArray
# display(good_pixel_mask)  # Type: bool

Unnamed: 0,Array,Chunk
Bytes,12.11 GiB,16.00 MiB
Shape,"(37, 8874, 9902)","(1, 2048, 2048)"
Dask graph,925 chunks in 1 graph layer,925 chunks in 1 graph layer
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 12.11 GiB 16.00 MiB Shape (37, 8874, 9902) (1, 2048, 2048) Dask graph 925 chunks in 1 graph layer Data type float32 numpy.ndarray",9902  8874  37,

Unnamed: 0,Array,Chunk
Bytes,12.11 GiB,16.00 MiB
Shape,"(37, 8874, 9902)","(1, 2048, 2048)"
Dask graph,925 chunks in 1 graph layer,925 chunks in 1 graph layer
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [11]:
# Select a layer and apply masking and scaling, then persist in dask
# layer_name = 'red'
rs = []
for layer_name in ['red', 'green', 'blue', 'nir']:

    # Get scaling and offset values from product description
    scale = measurement_info.loc[layer_name].scale_factor
    offset = measurement_info.loc[layer_name].add_offset

    # Apply valid mask and good pixel mask
    layer = data[[layer_name]].where(valid_mask[layer_name] & good_pixel_mask) * scale + offset
    layer = layer.persist()
    rs.append(layer)

  _reproject(
  _reproject(


In [12]:
import xarray as xr
result = rs[0].merge(rs[1])
result = result.merge(rs[2])
result = result.merge(rs[3])

In [13]:
ds1 = calculate_indices(result, index='NDVI', satellite_mission='s2')
ndvi = ds1["NDVI"]
average_ndvi = ndvi.resample(time='1M').mean() ## tính mean cho từng tháng -> time = 12

In [None]:
from deafrica_tools.plotting import rgb
rgb(data1, col="time")

In [193]:
red = data1.isel(x=0, y=0).red.values[-1]
nir = data1.isel(x=0, y=0).nir.values[-1]

In [195]:
(nir - red)/ (nir + red)

0.9581100828056502

In [196]:
data1.isel(x=0, y=0).red.values

array([ 6496,     0,  4608,     0,  3194,     0,  3432,     0,  7572,
           0,  5016,     0,  1318,     0,  1532,     0, 11360,     0,
       10504,     0,   913,     0,   534,     0,  7736,     0,   280,
           0,  3134,     0,  1450,     0,   210,     0,  5424,     0,
          86], dtype=uint16)

In [192]:
result.isel(x=0, y=0).red.values

array([        nan,         nan,         nan,         nan,         nan,
               nan,         nan,         nan,         nan,         nan,
               nan,         nan,  0.03179999,         nan,         nan,
               nan,         nan,         nan,         nan,         nan,
               nan,         nan,         nan,         nan,         nan,
               nan, -0.072     ,         nan,         nan,         nan,
               nan,         nan,         nan,         nan,  0.4424    ,
               nan, -0.0914    ], dtype=float32)

In [181]:
red = result.isel(x=0, y=0).red.values[-1]
nir = result.isel(x=0, y=0).nir.values[-1]

In [194]:
(nir - red)/ (nir + red)

0.9581100828056502

In [176]:
ndvi.isel(x=0, y=0).values

array([        nan,         nan,         nan,         nan,         nan,
               nan,         nan,         nan,         nan,         nan,
               nan,         nan, -0.76666677,         nan,         nan,
               nan,         nan,         nan,         nan,         nan,
               nan,         nan,         nan,         nan,         nan,
               nan,  4.356643  ,         nan,         nan,         nan,
               nan,         nan,         nan,         nan,  0.1968046 ,
               nan,  1.8679965 ], dtype=float32)

In [None]:
average_ndvi.plot(col='time',
           cmap='RdYlGn',
           size=6,
col_wrap=2)

In [156]:
loaded_datasets = {}
for idx, point in train.iterrows():
    key = f"point_{idx + 1}"
    try:
        loaded_datasets[key] = {
            "NDVI": average_ndvi.sel(x=point.geometry.x, y=point.geometry.y, method='nearest').values,
            "label": point.Name
                               }
    except Exception as e:
        # loaded_datasets[key] = None
        print(key)

point_477
point_478


In [None]:
loaded_datasets

In [None]:
## tiền xử lý data: fill nan, remove 

In [17]:
label_encoder = LabelEncoder()

# Fit and transform the labels
labels = train.Name.values
numeric_labels = label_encoder.fit_transform(labels)
label_mapping = dict(zip(labels, numeric_labels))

In [None]:
loaded_datasets

In [29]:
X = []
for k, v in loaded_datasets.items():
    X.append(v)

In [72]:
x_new = []
lb_new = []
for i in range(len(X)):
    if X[i] is not None:
        x_new.append(X[i])
        lb_new.append(numeric_labels[i])

In [38]:
df = pd.DataFrame(x_new)

In [73]:
column_means = np.nanmean(x_new, axis=0)
column_means_expanded = np.tile(column_means, (len(x_new), 1))

In [74]:
x_final = np.where(np.isnan(x_new), column_means_expanded, x_new)

In [175]:
x_final

array([[ 0.8304264 , -5.5423803 , -0.2822286 ],
       [ 0.8304264 , -1.685553  ,  1.7087475 ],
       [ 0.8304264 ,  0.84339976, 11.006107  ],
       ...,
       [ 0.8304264 ,  0.84339976,  1.0357237 ],
       [ 0.8304264 ,  0.84339976,  0.7418415 ],
       [ 0.8304264 ,  0.84339976,  1.8329847 ]], dtype=float32)

In [77]:
X_train, X_test, y_train, y_test = train_test_split(x_final, lb_new, test_size=0.3, random_state=42)


In [135]:
model = RandomForestClassifier(n_estimators=1000, random_state=42, criterion='gini', max_depth=10)
model.fit(X_train, y_train)

In [136]:
predictions = model.predict(X_test)
accuracy = accuracy_score(y_test, predictions)
print(f'Accuracy: {accuracy}')

Accuracy: 0.4031746031746032


In [139]:
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Tạo RandomForestClassifier mặc định để sử dụng làm mô hình ban đầu trong pipeline
base_model = RandomForestClassifier(random_state=42)

# Tạo pipeline
pipeline = Pipeline([
    # ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler()),
    ('classifier', base_model),
])

# Thiết lập các tham số bạn muốn tối ưu hóa
param_grid = {
    'classifier__n_estimators': [100, 500, 1000],
    'classifier__max_depth': [5, 10, 20],
    'classifier__criterion': ['gini', 'entropy'],
}

# Sử dụng GridSearchCV để tìm bộ tham số tốt nhất
grid_search = GridSearchCV(pipeline, param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)

# In ra bộ tham số tốt nhất
best_params = grid_search.best_params_
print("Best Parameters:", best_params)

# Dự đoán trên tập kiểm tra
y_pred = grid_search.predict(X_test)

# Đánh giá kết quả
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Best Parameters: {'classifier__criterion': 'entropy', 'classifier__max_depth': 5, 'classifier__n_estimators': 500}
Accuracy: 0.35555555555555557


In [169]:
avg = average_ndvi.persist()

In [166]:
avg1 = avg.fillna(avg.mean(dim='x'))

In [155]:
grid_search.predict([avg1.isel(y=0, x=0).values])

array([4])