## View example downloaded EMIT agricultural scenes

We will use the image granules that you orthorectified in the previous step **"1_Orthorectify_images.ipynb"**

### Step 1. Setup notebook

Import packages

In [None]:
import os, sys, fnmatch
import warnings
from osgeo import gdal
import numpy as np
import math
import rasterio as rio
import xarray as xr
import holoviews as hv
import hvplot.xarray
import pandas as pd
import hvplot.pandas
import geopandas as gpd
import rioxarray as rxr
import numpy as np
import folium
from branca.element import Figure

# Get custom functions
#sys.path.append(os.path.join(os.path.expanduser("~"),"HYR-SENSE","tools","functions"))
#from helper_functions import *
sys.path.append(os.path.join(os.path.expanduser("~"),"HYR-SENSE","tools","functions"))
from spectral_index import *

# Setup paths
datadir = os.path.join(os.path.expanduser('~'),'HYR-SENSE/data/Agriculture/')
outdir = os.path.join(os.path.expanduser('~'),'HYR-SENSE/data/Agriculture/output')
workflow = "Agriculture"
platform = "emit"
source_file_path = os.path.join(os.path.expanduser("~"),"HYR-SENSE","data",workflow,platform)

# This will ignore some warnings caused by holoviews
warnings.simplefilter('ignore')

### Step 2. Find all downloaded and orthorectified agricultural images

In [None]:
# Define workflow which selects the appropriate image data folder
print("*** EMIT data folder: " + source_file_path)
print("")
print("*** GIS data folder: " + datadir)
print("")

In [None]:
### List all of the available data located in the EMIT data folder
granules = fnmatch.filter(os.listdir(source_file_path), '*ortho.nc')
print("*** EMIT Data Granules ***")
granules

### Step 3. Select and load a previously orthorectified EMIT image

First let's remind ourselves where we currently EMIT data ready to be analyzed.  We can do this by re-plotting the EMIT bounding boxes that we saved in the data search and download step

In [None]:
### Load the EMIT bounding boxes and plot on the map
gdf_all = gpd.read_file(os.path.join(datadir,'emit_granule_footprints.gpkg'))
# Filter to the Yuma, CO granule
gdf_yuma = gdf_all[gdf_all['meta.native-id'] == 'EMIT_L2A_RFL_001_20230729T205630_2321014_019']
gdf_yuma = gdf_yuma.to_crs(5070)
gdf_all = gdf_all.to_crs(5070)

In [None]:
print("*** EMIT granule bounding boxes ***")
gdf_all

In [None]:
# Create the interactive map using folium
fig = Figure(width="750px", height="375px")
map1 = folium.Map(tiles='https://mt1.google.com/vt/lyrs=y&x={x}&y={y}&z={z}', attr='Google')
fig.add_child(map1)

gdf_all.explore(
    "meta.native-id",
    categorical=True,
    tooltip=[
        "meta.native-id",
        "start_datetime",
    ],
    popup=True,
    style_kwds=dict(fillOpacity=0.1, width=2),
    name="EMIT",
    m=map1,
    legend=False
)

map1.fit_bounds(map1.get_bounds(), padding=(30, 30))
display(fig)

For now, let's select a single scene ID to explore the data more closely. We will choose the scene that covers Yuma Colorado, shown in blue below

In [None]:
### Show just the Yuma CO scene
gdf_yuma.explore(
    "meta.native-id",
    categorical=True,
    tooltip=[
        "meta.native-id",
        "start_datetime",
    ],
    popup=True,
    style_kwds=dict(fillOpacity=0.91, width=5),
    name="EMIT",
    m=map1,
    legend=False
)
map1.fit_bounds(map1.get_bounds(), padding=(30, 30))
display(fig)

In [None]:
### Pick an image granule to explore - we will start with EMIT_L2A_RFL_001_20230729T205630_2321014_019_ortho.nc
img_file = 'EMIT_L2A_RFL_001_20230729T205630_2321014_019_ortho.nc'

In [None]:
# # Load the selected image to memory
img_file_dat = os.path.join(source_file_path,img_file)
ds_geo = xr.open_dataset(img_file_dat)

### Step 4. Quickly display the selected orthorectified image

Here we will view the selected orthorectified image that contains Yuma Colorado, shown below with the yellow dot

In [None]:
refl850 = ds_geo.sel(wavelengths=850, method='nearest')
yuma_df = [[40.1222,-102.7252]]
yuma_df = pd.DataFrame(yuma_df, columns=['Latitude', 'Longitude'])

img_plot = ds_geo.sel(wavelengths=850, method='nearest').hvplot.image(cmap='Viridis', geo=True, tiles='ESRI', alpha=0.8, frame_height=600).opts(
    title=f"Reflectance at {refl850.wavelengths.values:.3f} {refl850.wavelengths.units} (Orthorectified)")
pt_plot = yuma_df.hvplot.points("Longitude", "Latitude", geo=True, color="yellow", alpha=0.9, s=250, global_extent=False)
plots = img_plot * pt_plot
plots

We can see that the orthorectification step placed the data on a geo geographic that matches pretty well with ESRI tiles. Now that we have a better idea of what the target area looks like, we can also plot the spectra using the georeferenced data. 

We can also display three bands side-by-side that cover the visible through the near-infrared to look a the differences in surfaec reflectance at the three different wavelengths (EMIT band centers)

In [None]:
refl550 = ds_geo.sel(wavelengths=550, method='nearest');refl650 = ds_geo.sel(wavelengths=650, method='nearest')
plots = refl550.hvplot.image(cmap='viridis', aspect = 'equal', frame_width=500).opts(title="Band: 550") + \
refl650.hvplot.image(cmap='viridis', aspect = 'equal', frame_width=500).opts(title="Band: 650") + \
refl850.hvplot.image(cmap='viridis', aspect = 'equal', frame_width=500).opts(title="Band: 850")
plots

### Step 5. Plot example spectra

Now let's plot some example spectra found in the image. Before we do this, we should filter out the water absorption bands like we did earlier. By limiting the third dimension of the array to good_wavelengths.

In [None]:
ds_geo['reflectance'].data[:,:,ds_geo['good_wavelengths'].data==0] = np.nan

Now let's select some random points from within the scene covering different crop types and crop stages from center-pivot irrigation (A), rainfed (B), and bare (C)

In [None]:
point1 = ds_geo.sel(longitude=-102.694,latitude=40.347,method='nearest')
point2 = ds_geo.sel(longitude=-102.957,latitude=40.160,method='nearest')
point3 = ds_geo.sel(longitude=-102.516,latitude=40.428,method='nearest')
spectra_plots = point1.hvplot.line(y='reflectance',x='wavelengths', color='black', frame_height=400, frame_width=440).opts(
    title = f'A) Latitude = {point1.latitude.values.round(3)}, Longitude = {point1.longitude.values.round(3)}') + \
point2.hvplot.line(y='reflectance',x='wavelengths', color='black', frame_height=400, frame_width=440).opts(
    title = f'B) Latitude = {point2.latitude.values.round(3)}, Longitude = {point2.longitude.values.round(3)}') + \
point3.hvplot.line(y='reflectance',x='wavelengths', color='black', frame_height=400, frame_width=440).opts(
    title = f'C) Latitude = {point3.latitude.values.round(3)}, Longitude = {point3.longitude.values.round(3)}') 
spectra_plots

Based on what you see above, what can you take away from these different spectral plots about the status, vigor, and cover of different crops from these three different agricultural fields?

### Step 6. Spectral vegetation indices

Before we proceed with our analysis of the spectral differences and characteristics of different crop types in the Yuma, CO region let's first discuss what we can learn about plant health and status using high spectral resolution data like EMIT. We will review what information is contained in the spectral domain as well as how we can use spectral vegetation indices to tease out subtle differences in plant pigments, water content and physiology using just the raw spectral data provided by EMIT.

For more examples of how to calculate spectral vegetation indices with EMIT, you can review the "How To" notebook **"Calculate_spectral_vegetation_indices.ipynb"** located in the HYR-SENSE repo [here](https://github.com/CU-ESIIL/HYR-SENSE/blob/main/notebooks/how_to/Calculate_spectral_vegetation_indices.ipynb).  In this notebook we will proceed with our analysis using the functions provided in the functions file [spectral_index.py](https://github.com/CU-ESIIL/HYR-SENSE/blob/main/tools/functions/spectral_index.py) and use these SVIs later to compare and contrast the water, pigment, and physiological differences of our select crop types.

First, let's take a moment to consider again what remote sensing imagery like those provided by EMIT can tell us about the underlying vegetation. The internal structure and biochemistry of leaves (A) within a canopy control the optical signatures observed by remote sensing instrumentation (B). The amount of incident radiation that is reflected by, transmitted through, or absorbed by leaves within a canopy is regulated by these structural and biochemical properties of leaves. For example, leaf properties such as a thick cuticle layer, high wax, and/or a large amount of leaf hairs can significantly influence the amount of first-surface reflectance (that is the reflected light directly off the outer leaf layer that does not interact with the leaf interior), causing less solar radiation to penetrate into the leaf. The thickness of the mesophyll layer associated with other properties, such as thicker leaves, can cause higher degree of internal leaf scattering, less transmittance through the leaf, and higher absorption in some wavelengths. Importantly, the diffuse reflectance out of the leaf is that modified by internal leaf properties and contains useful for mapping functional traits (B). High spectral resolution measurements of leaves and plant canopies enable the indirect, non-contact measurement of key structural and chemical absorption features that are associated with the physiological and biochemical properties of plants (B)

**A)** ![leaf_anatomy.jpg](../../images/leaf_anatomy_figure.jpg) **B)** ![spectral_signatures.jpg](../../images/spectral_signatures.jpg)

We can make use of all of the information contained within the emergent spectral signatures provided by vegetation.  We can do this by directly utilizing the spectral profiles of a leaf or an EMIT pixel, or we can instead target specific wavebands provided by data like EMIT to calculate a spectral vegetation index (SVI). SVIs range widely in the wavelengths/bands, structure, and applications. This article provides some background information ([https://www.nature.com/articles/s41597-023-02096-0](https://www.nature.com/articles/s41597-023-02096-0)) but you can find comprehensive list of SVIs [here](https://www.indexdatabase.de/)


For more information, you can review these select articles and resources that discuss how leaf and canopy structure, leaf chemical properties, and stress can alter the spectral signatures we see with remote sensing data like those provided by EMIT and how SVIs provide us a way to easily probe the properties of vegetation remotely.

[Sources of variability in canopy reflectance and the convergent properties of plants](https://doi.org/10.1111/j.1469-8137.2010.03536.x)

[Retrieval of foliar information about plant pigment systems from high resolution spectroscopy](https://doi.org/10.1016/j.rse.2008.10.019)

[Scaling Functional Traits from Leaves to Canopies](https://link.springer.com/chapter/10.1007/978-3-030-33157-3_3)

#### Calculate NDVI

We can start by calculating a very simple, yet powerful, and widely-used SVI, the normalized difference vegetation index (NDVI). NDVI has been used for over 40 years to study changes on the Earth's surface, specifically related to vegetation, stress, and agriculture. For more information on NDVI, you can explore this article from NASA: [https://earthobservatory.nasa.gov/features/MeasuringVegetation/measuring_vegetation_1.php](https://earthobservatory.nasa.gov/features/MeasuringVegetation/measuring_vegetation_1.php)

To calculate NDVI, we need to select which bands we want to include in the calculation. In general, NDVI is defined using a red and near-infrared band, so lets use a band centered at 650nm and another at 850nm, both squarely within the red and NIR wavelength range as shown in the example spectral graph above

The basic structure of the NDVI is: NDVI = (NIR−Red)/(NIR + Red)

In [None]:
### Let's calculate the NDVI using the provided normalized_diff function
ndvi = normalized_diff(input_xarray = ds_geo, band1=650, band2=850)
ndvi.hvplot.image(cmap='viridis', geo=True, tiles='ESRI', aspect = 'equal', frame_width=720, clim=(0,1)).opts(title="NDVI Image")

Above is the NDVI map generated from the Yuma CO scene. What do you new imformation or details do you notice in this new NDVI map?

In [None]:
### Now let's calculate 2 more SVIs and show the three SVIs side by side for comparison

# We can calculate the Normalized Difference Water Index that is designed to capture subtle variations in canopy water content
ndwi = normalized_diff(input_xarray = ds_geo, band1=2200, band2=864)

# We can also calculate the Red Edge NDVI that is highly sensitive the subtle variations in plant photosynthetic pigment content
reNDVI = normalized_diff(input_xarray = ds_geo, band1=705, band2=750)

# Show the three plots side by side
ndvi.hvplot.image(cmap='viridis', geo=True, tiles='ESRI', aspect = 'equal', frame_width=500, clim=(0,1)).opts(title="NDVI Image") + \
ndwi.hvplot.image(cmap='viridis', geo=True, tiles='ESRI', aspect = 'equal', frame_width=500, clim=(0,1)).opts(title="NDWI Image") + \
reNDVI.hvplot.image(cmap='viridis', geo=True, tiles='ESRI', aspect = 'equal', frame_width=500, clim=(0.44,0.48)).opts(title="reNDVI Image")

Based on what you see above, what are the similarities and differences across the three normalized-difference SVIs: NDVI, NDWI, and the reNDVI?  Are there particular areas that show strong similarities? Are there observable differences?

We will further explore the similarities and differences in crop condition and physiology using raw EMIT spectra and SVIs below.

### Step 7. Spectra of major crop types


Now let's look at EMIT spectral plots across major crop types represented in the selected EMIT scene.  To do this we are going to need the Cropland Data Layer (CDL) which provides a raster image of different crop types, we will clip to our EMIT granule region, identify the top 10 most common crop classes, and then randomly sample points from the three most common crop classes to compare spectral plots

In [None]:
### Load the Cropland Data Layer (CDL) for our region
## Crop to our Yuma, CO footprint
# Load the CDL raster
cdl_path = os.path.join(datadir,'CDL_2023_CO_SouthPlatte_Republican.tif')
cdl = rxr.open_rasterio(cdl_path, mask=True, cache=False).squeeze()
cdl = cdl.rio.clip(gdf_yuma.geometry)
print(cdl)
unique_values = np.unique(cdl.values)
print(unique_values)

# Get the most common pixel values by counting the frequency
cdl_values = cdl.values.flatten()
cdl_values = cdl_values[cdl_values > 0]
counts = np.bincount(cdl_values)

# Get the indices of the ten most common pixel values
top_ten_indices = np.argsort(counts)[-10:][::-1]
top_ten_counts = counts[top_ten_indices]
print("Ten most common pixel values and their counts:", list(zip(top_ten_indices, top_ten_counts)))

# Convert the counts to a DataFrame
counts_df = pd.DataFrame({'Codes': top_ten_indices, 'Counts': top_ten_counts})

# Write the df to a csv to save results
counts_df.to_csv(os.path.join(datadir,'CDL_codes_and_counts.csv'), index=False)

print("Counts DataFrame:")
counts_df

Above you can see the counts by CDL crop class for the 10 most common classes sorted by total counts

In [None]:
# Load the crop type lookup table
lookup = pd.read_csv(os.path.join(datadir,'CDL_codes.csv'))

# Join the counts DataFrame with the lookup table
result_df = counts_df.merge(lookup, on='Codes', how='left')
print("Top ten most common crop types and their pixel values:")
print(result_df)

Now lets select three crop classes to analyze below.  Let's look at **Winter Wheat**, **Corn**, and **Millet** classes

In [None]:
from shapely.geometry import Point

major_crops = [24,1,29]
# Generate 10 random points within each of the three classes
points = []
for code in major_crops:
    # Find the indices of the pixels that belong to the current class
    indices = np.column_stack(np.where(cdl.values == code))
    for _ in range(10):
        # Randomly select an index
        rand = indices[np.random.choice(len(indices))]
        y, x = cdl.y[rand[0]].values, cdl.x[rand[1]].values
        point = Point(x, y)
        points.append((point, code))

# Create a GeoDataFrame with the sample points
gdf_samples = gpd.GeoDataFrame(points, columns=['geometry', 'Codes'], crs=cdl.rio.crs)

# Join with the lookup table to get the crop types
gdf_samples = gdf_samples.merge(lookup, left_on='Codes', right_on='Codes', how='left')
gdf_samples = gdf_samples.to_crs(4326)
gdf_samples['Latitude'] = gdf_samples.geometry.y
gdf_samples['Longitude'] = gdf_samples.geometry.x
print(gdf_samples)

#### Plot average spectra for the three major crop types

Now that we have a random sample from the major crop types in our granule, we can plot the spectra at these points to examine any difference between the crop types.

NOTE: need to figure out why the points fall outside the EMIT granule?

In [None]:
# Plot the EMIT data
df_samples = pd.DataFrame(gdf_samples.drop(columns='geometry'))

emit_plot = ds_geo.sel(wavelengths=850, method='nearest').hvplot.image(
    cmap='greys',
    frame_height=600,
    frame_width=600,
    geo=True,
    crs='EPSG:4326'
).opts(title="Major Crop Type Samples")

# Plot the sample points
points_plot = df_samples.hvplot.points(
    x='Longitude',
    y='Latitude',
    by='Codes',
    color=hv.Cycle('Dark2'),
    geo=True,
    crs='EPSG:4326'
)

# Combine the plots
combined_plot = emit_plot * points_plot
combined_plot

In [None]:
samples = df_samples.set_index(['Codes'])
xp = samples.to_xarray()
xp

In [None]:
extracted = ds_geo.sel(latitude=xp.Latitude,longitude=xp.Longitude, method='nearest').to_dataframe()
extracted

NOTE: need to figure out the random points being outside the bounds. Also need to probably create an average for each crop type and plot that. Then this plot won't look so terrible ...

In [None]:
extracted.hvplot(
    x='wavelengths',y='reflectance', by=['Codes'], 
    color=hv.Cycle('Dark2'), 
    frame_height=400, frame_width=600
).opts(title='Example Points - Reflectance', xlabel='Wavelengths (nm)',ylabel='Reflectance')

### Step 6. Experiment with band ratios or what are called Specrtal Vegetation Indices (SVIs)

To explore the utility of high spectral resolution data for calculating SVIs for Earth Science, we can demonstrate how to caluclate a commonly-used index: The Normalized Difference Vegetation Index (NDVI).  NDVI has been used for over 40 years to study changes on the Earth's surface, specifically related to vegetation, stress, and agriculture. For more information on NDVI, you can explore this article from NASA: [https://earthobservatory.nasa.gov/features/MeasuringVegetation/measuring_vegetation_1.php](https://earthobservatory.nasa.gov/features/MeasuringVegetation/measuring_vegetation_1.php)

In addition, here is a recent article describing the use of SVIs in Earth Science <br>
[https://www.nature.com/articles/s41597-023-02096-0](https://www.nature.com/articles/s41597-023-02096-0)

#### Look at spectra across major crop types

DRAFT.

Load data from the Cropland Data Layer (CDL) which provides a raster image of different crop types.

Clip to our EMIT granule region.

Identify the top 10 most common classes.

Create a random point sample of the top three most common crops.

Calculate the Normalized Difference Vegetation Index (NDVI)

In [None]:
# NDVI uses a combination of reflectance in the NIR and Red wavelengths.  For example, 850 and 655 nm
# First, let's calculate an NDVI image and take a look at the results
refl650 = ds_geo.sel(wavelengths=650, method='nearest')
refl850 = ds_geo.sel(wavelengths=850, method='nearest')
ndvi = (refl850-refl650)/(refl850+refl650)

In [None]:
ndvi.hvplot.image(cmap='viridis', geo=True, tiles='ESRI', aspect = 'equal', frame_width=720, clim=(0,1)).opts(title="NDVI Image")

In [None]:
### STILL A WIP!!!

### Boxplots of SVIs across crop types

### Boxplots of SVIs for irrigated vs. non-irrigated

In [None]:
# Load the irrigated lands raster
irrigated = rxr.open_rasterio(os.path.join(datadir,'LANID_Irrigation_CO_SouthPlatte_Republican.tif'))
print(irrigated)

In [None]:
# Create a data stack of CDL, irrigation, and EMIT

if not ds_geo.rio.crs:
    ds_geo = ds_geo.rio.write_crs("EPSG:4326")  # Set to the appropriate CRS if known

# Reproject and match our CDL and irrigation layers
cdl_repr = cdl.rio.reproject_match(ds_geo)
irrigated_repr = irrigated.rio.reproject_match(ds_geo)

ds_geo_da = ds_geo['emit_data'] if 'emit_data' in ds_geo.data_vars else ds_geo.to_array().squeeze()

# Stack the data layers
ds_stack = xr.Dataset({
    'emit': ds_geo_da,
    'cdl': cdl_repr,
    'irrigation': irrigated_repr
})

print(ds_stack)

In [None]:
wavelengths = ds_stack['emit'].coords['wavelengths']
wavelengths