## 2023 Open Science Data Challenge - Sentinel-2 Phenology with Cloud Filtering

This notebook calculates vegetation phenology using Sentinel-2 data with cloud filtering. This pixel-based approach is better than a scene-based approach since clouds are quite random for any given time and location. To address phenology, the algorithm uses the Normalized Difference Vegetation Index (NDVI) which is a common proxy for vegetation growth and health. The outputs of this notebook can be used to assess differences in agriculture fields over time or space and also allow the assessment of growing states such as planting and harvesting. The baseline data is [Sentinel-2 Level-2A](https://planetarycomputer.microsoft.com/dataset/sentinel-2-l2a) data from the MS Planetary Computer catalog.

In [1]:
# Supress Warnings 
import warnings
warnings.filterwarnings('ignore')

# Import common GIS tools
import pandas as pd
import numpy as np
import xarray as xr
import matplotlib.pyplot as plt
import rasterio.features
import rioxarray as rio
import xrspatial.multispectral as ms

# Import Planetary Computer tools
import pystac_client
import planetary_computer as pc
import odc
from odc.stac import stac_load
from odc.algo import to_rgba

from tqdm import tqdm

In [2]:
crop_presence_data = pd.read_csv("Crop_Location_Data_20221201_202_to_402.csv")
crop_presence_data

Unnamed: 0,Latitude and Longitude,Class of Land
0,"(9.953530550908601, 105.52871419067279)",Rice
1,"(9.973062390583982, 105.5209923005689)",Rice
2,"(9.990323086111061, 105.51054503748719)",Rice
3,"(9.954893237397581, 105.52508036003567)",Rice
4,"(9.969428559946701, 105.50191468972403)",Rice
...,...,...
196,"(10.472078182848456, 104.91241651461723)",Non Rice
197,"(10.471623954018797, 104.91241651461723)",Non Rice
198,"(10.471169725189137, 104.91241651461723)",Non Rice
199,"(10.470715496359476, 104.91241651461723)",Non Rice


In [3]:
NDVI_values=[]
for coordinates in tqdm(crop_presence_data['Latitude and Longitude']):
    lat_long=coordinates.replace('(','').replace(')','').replace(' ','').split(',')
    #box_size_deg = 0
    min_lon = float(lat_long[1])
    min_lat = float(lat_long[0])
    max_lon = float(lat_long[1])
    max_lat = float(lat_long[0])
    bounds = (min_lon, min_lat, max_lon, max_lat)
    time_window="2021-12-01/2022-01-31"
    stac = pystac_client.Client.open("https://planetarycomputer.microsoft.com/api/stac/v1")
    search = stac.search(collections=["sentinel-2-l2a"], bbox=bounds, datetime=time_window)
    items = list(search.get_all_items())
    resolution = 20  # meters per pixel 
    scale = resolution / 111320.0 # degrees per pixel for CRS:4326 
    xx = stac_load(
    items,
    bands=["red", "nir", "SCL"],
    crs="EPSG:4326", # Latitude-Longitude
    resolution=scale, # Degrees
    chunks={"x": 2048, "y": 2048},
    dtype="uint16",
    patch_url=pc.sign,
    bbox=bounds)
    cloud_mask = \
    (xx.SCL != 0) & \
    (xx.SCL != 1) & \
    (xx.SCL != 3) & \
    (xx.SCL != 6) & \
    (xx.SCL != 8) & \
    (xx.SCL != 9) & \
    (xx.SCL != 10) 
    cleaned_data = xx.where(cloud_mask).astype("uint16")
    mean_unfiltered = xx.mean(dim=['longitude','latitude']).compute()
    ndvi_mean = (mean_unfiltered.nir-mean_unfiltered.red)/(mean_unfiltered.nir+mean_unfiltered.red)
    mean_clean = cleaned_data.mean(dim=['longitude','latitude']).compute()
    ndvi_mean_clean = (mean_clean.nir-mean_clean.red)/(mean_clean.nir+mean_clean.red)
    nparray_ndvi_mean_clean=np.array(ndvi_mean_clean.values)
    nan_mask = np.isnan(nparray_ndvi_mean_clean)
    non_nan_nparray_ndvi_mean_clean = nparray_ndvi_mean_clean[~nan_mask]
    average_non_nan_nparray_ndvi_mean_clean=np.mean(non_nan_nparray_ndvi_mean_clean)
    NDVI_values.append(average_non_nan_nparray_ndvi_mean_clean)
    

100%|████████████████████████████████████████████████████████████████████████████████| 201/201 [15:34<00:00,  4.65s/it]


In [4]:
NDVI_values

[0.7439078498412914,
 0.6232555298261847,
 0.7157818571613527,
 0.7494427769005372,
 0.8031155664401299,
 0.7309990706018542,
 0.7723164581801645,
 0.7346236881767965,
 0.7664249293751204,
 0.8275221784801332,
 0.8431196007333562,
 0.6757842869309505,
 0.8028796373040625,
 0.855991294237948,
 0.7782210522750989,
 0.8527205737045391,
 0.6723777574855597,
 0.7873366516209577,
 0.8085284404514591,
 0.7426253652370803,
 0.7219132751202674,
 0.7297716961397958,
 0.6743242297479661,
 0.8821882844507637,
 0.6963104144932845,
 0.6173150303580378,
 0.7923899400244707,
 0.8805139455165442,
 0.8065004009921248,
 0.7238388641252193,
 0.7996214844035774,
 0.6662625409173849,
 0.76978488043466,
 0.7939704811151485,
 0.7318068502890668,
 0.6607410380018872,
 0.7094161388082152,
 0.8604435900615989,
 0.8207278638519259,
 0.7759707219096278,
 0.7761121470020551,
 0.7703409968916262,
 0.7475192559136583,
 0.6638875187356885,
 0.7083936899946736,
 0.7401573710296506,
 0.7428189055189969,
 0.8635022321560

In [5]:
NDVI_values

[0.7439078498412914,
 0.6232555298261847,
 0.7157818571613527,
 0.7494427769005372,
 0.8031155664401299,
 0.7309990706018542,
 0.7723164581801645,
 0.7346236881767965,
 0.7664249293751204,
 0.8275221784801332,
 0.8431196007333562,
 0.6757842869309505,
 0.8028796373040625,
 0.855991294237948,
 0.7782210522750989,
 0.8527205737045391,
 0.6723777574855597,
 0.7873366516209577,
 0.8085284404514591,
 0.7426253652370803,
 0.7219132751202674,
 0.7297716961397958,
 0.6743242297479661,
 0.8821882844507637,
 0.6963104144932845,
 0.6173150303580378,
 0.7923899400244707,
 0.8805139455165442,
 0.8065004009921248,
 0.7238388641252193,
 0.7996214844035774,
 0.6662625409173849,
 0.76978488043466,
 0.7939704811151485,
 0.7318068502890668,
 0.6607410380018872,
 0.7094161388082152,
 0.8604435900615989,
 0.8207278638519259,
 0.7759707219096278,
 0.7761121470020551,
 0.7703409968916262,
 0.7475192559136583,
 0.6638875187356885,
 0.7083936899946736,
 0.7401573710296506,
 0.7428189055189969,
 0.8635022321560

In [6]:
crop_presence_data['NDVI'] = NDVI_values

In [7]:
crop_presence_data

Unnamed: 0,Latitude and Longitude,Class of Land,NDVI
0,"(9.953530550908601, 105.52871419067279)",Rice,0.743908
1,"(9.973062390583982, 105.5209923005689)",Rice,0.623256
2,"(9.990323086111061, 105.51054503748719)",Rice,0.715782
3,"(9.954893237397581, 105.52508036003567)",Rice,0.749443
4,"(9.969428559946701, 105.50191468972403)",Rice,0.803116
...,...,...,...
196,"(10.472078182848456, 104.91241651461723)",Non Rice,0.754261
197,"(10.471623954018797, 104.91241651461723)",Non Rice,0.716282
198,"(10.471169725189137, 104.91241651461723)",Non Rice,0.850926
199,"(10.470715496359476, 104.91241651461723)",Non Rice,0.757455


In [8]:
crop_presence_data.to_csv('lat_lon1_202to402_ndvi.csv', index=False)