# Geospatial Python
## Temporal assessment using command line arguments 
Setup: https://carpentries-incubator.github.io/geospatial-python/index.html

Based on instruction: https://carpentries-incubator.github.io/geospatial-python/09-raster-calculations.html

Boundary data source - https://burnseverity.cr.usgs.gov/ravg/data-access, searched for "cameron peak"

This notebook does the following:
- Loads a boundary file of a fire boundary
- Searches for satellite data within the area and during the growing season for a specific year
- Takes several of the retrieved satellite images and merges them together to get complete coverage
- Translates the satellite data into a value representing vegetative growth (Normalized Difference Vegetation Index, NDVI)
- Classifies and generates statisitics of the vegetative growth, saves the output as both text and geotiff 

We'll want to compare different years to see pre, post, and potentially other years of vegetative growth within a burn area.
To do this, we'll leverage a library called *papermill*(https://github.com/nteract/papermill) allowing us to pass parameters to the notebook which will dynamically load, analyze, and save our results for a specific year of satellite data.

When we deploy this script to a HPC environment, running mulitple instances of it will allow us to much more rapidly get our results.

Here's one call of this script for the year 2021:

```
papermill Boundary\ Raster\ Classification.ipynb output_2021.ipynb -p year 2021
```

Before executing the code cells, be sure to **fill in the blanks** by replacing the "_____" as appropriate

In [None]:
import pystac
import geopandas as gpd
import folium # to make interactive maps
import rioxarray
from rioxarray import merge
from pystac_client import Client # to query STAC API endpoint

import geojson # to parse spatial data format
import folium # to create an interactive map
from folium.plugins import Draw # to allow drawing

# Create a variable to determine if the notebook is being run locally
local_run=False
if local_run:
    # this package is problematic on remote computers
    from localtileserver import TileClient, get_folium_tile_layer # to visualize the geotiff

import numpy as np # to work with numbered lists
import xarray # to preserve spatial metadata when working with numbered lists

import earthpy.plot as ep # for drawing a legend
import matplotlib.pyplot as plt # for plotting
from matplotlib.colors import ListedColormap # to color our classified data


In [None]:
# Start by setting a variable for the year
# THIS CODE CELL has the tag 'parameters' (see https://github.com/nteract/papermill)
# This tag will allow us to overwrite the contents of the cell using the command line, which also creates a new notebook
# We'll set a 'year' variable and look at the satellite image just before the fire broke out
# Note: The Cameron Peak fire occured August 13, 2020
year = "_____"

In [None]:
# Load in our boundary data
boundary = gpd.read_file("data/co4060910587920200813_20180915_20210907_burn_bndy.zip")
boundary

In [None]:
# Show the data types
print(boundary."_____")

In [None]:
# View the map file 'boundary'
# Use the file's lat and long column to center the map
m = folium.Map([boundary.iloc[1]['BurnBndLat'],boundary.iloc[1]['BurnBndLon']], zoom_start = 10)

# Change the column type for display on the interactive map 
boundary['Ig_Date'] = boundary['Ig_Date'].astype(str) # otherwise error "Object of type Timestamp is not JSON serializable"

# Add the layer to the map
folium.GeoJson(boundary).add_to(m)

# Show the map
"_____"

In [None]:
# Display the boundary variable
boundary."_____"

In [None]:
# Show the boundary CRS
boundary."_____"

In [None]:
# Convert tne boundary to use CRS '4326'
boundary=boundary.to_crs("_____")

In [None]:
# Join the rows to get a complete bounds
poly=boundary.geometry.union_all()
poly.bounds

In [None]:
# Perform metadata search from Sentinel-2, Level 2A, to retrieve Cloud Optimized GeoTiffs (COGs)
api_url = "https://earth-search.aws.element84.com/v1"

# Open the api
client = Client.open(api_url)
# Store a variable pointing to the collection of interest
# Note: collection ID is taken from Sentinel-2 Level 2A - https://radiantearth.github.io/stac-browser/#/external/earth-search.aws.element84.com/v1/collections/sentinel-2-c1-l2a
collection = "sentinel-2-l2a" 

search = client.search(
    collections=[collection],
    bbox=poly.bounds,# https://datatracker.ietf.org/doc/html/rfc7946#section-5
    datetime=str(year)+"-07-01/"+str(year)+"-07-31",
    query=["eo:cloud_cover<20"],
    limit=10
)
# Show the number of scenes (i.e. the portion of the footage recorded by the satellite)
print(search.matched())

In [None]:
# Sort the items by cloud_cover
items = search.item_collection()

items_sorted = sorted(items, key=lambda x: x.properties["eo:"_____""]) 

In [None]:
# Lets short list the items which we'll merge later and then clip
# Start by gathering the hrefs
red_item_hrefs=[]
nir_item_hrefs=[]

for i in items_sorted[0:3]:
    print(i)
    red_item_hrefs.append(i.assets["red"].href)
    nir_item_hrefs.append(i.assets["nir08"].href)

In [None]:
# View the map file 'boundary'
# Use the file's lat and long column to center the map
m = folium.Map([boundary.iloc[1]['BurnBndLat'],boundary.iloc[1]['BurnBndLon']], zoom_start = 10)

# Add the boundary
folium.GeoJson(boundary).add_to(m)


# View the red band 'items' on the map
if local_run:
    for id, i in enumerate(red_item_hrefs):
        tiles = TileClient(i) # create tiles client
        tile_layer = get_folium_tile_layer(tiles, name='red_'+str(id)) # create tile layer
        tile_layer.add_to(m)
else:
    # We'll just show the raster boundaries
    # Some extra modules and a library are required for this
    from shapely.geometry import box # To create a box
    from shapely.ops import transform # The shapely transform module  
    import pyproj # A reprojection library
    # Create the transformer
    project = pyproj.Transformer.from_crs(rioxarray.open_rasterio(red_item_hrefs[0]).rio.crs.to_epsg(), 4326, always_xy=True).transform
    
    # Apply the transformation
    for id, i in enumerate(red_item_hrefs):
        raster = rioxarray.open_rasterio(red_item_hrefs[id])
        # Create boundary boxes
        bbox = box(*raster.rio.bounds())
        bbox_transformed = transform(project, bbox)
        folium.GeoJson(bbox_transformed,
            style_function=lambda feature: {
            "color": "purple",
        }).add_to(m)

# show the bounds of the file 'boundary'
folium.Rectangle(
    bounds=[[poly.bounds[1], poly.bounds[0]], [poly.bounds[3], poly.bounds[2]]],
).add_to(m)

draw = Draw(export=True)
draw.add_to(m)

folium.LayerControl().add_to(m)

# Show the map
"_____"

In [None]:
# Open the rasters and store them in separate lists using the argument masked=True.

red_rasters=[]
for i in red_item_hrefs:
    red_rasters.append(rioxarray.open_rasterio(i, masked=True))

nir_rasters=[]
for i in nir_item_hrefs:
    nir_rasters.append(rioxarray.open_rasterio(i, masked=True))

In [None]:
# Set our boundry to the CRS of the raster
boundary_new_crs=boundary.to_crs(red_rasters[0].rio.crs)
poly_new_crs=boundary_new_crs.geometry.union_all()

# Show the bounds
poly_new_crs."_____"

In [None]:
# Merge our files into one

red_merged = merge.merge_arrays(red_rasters,poly_new_crs.bounds)

nir_merged = merge.merge_arrays(nir_rasters,poly_new_crs.bounds)

In [None]:
# Export the two merged files. Add the 'year' variable as part of the file name

red_merged.rio.to_raster("red_merged"+str(year)+".tif")

nir_merged.rio.to_raster("nir_merged"+str(year)+".tif")

## Raster Math

## Crop raster data with polygons

In [None]:
# Check the shapes of the two rasters
print(red_merged.shape, nir_merged.shape)

In [None]:
# As their width and height do not match, 
# use reproject_match to both reproject and clip the raster to the CRS.
red_merged_matched = red_merged.rio.reproject_match(nir_merged,nodata=np.nan ) # Set NaN as NoData
print(red_merged_matched.shape)

In [None]:
# Compute the NDVI as a new raster 
ndvi = (nir_merged - red_merged_matched)/ (nir_merged + red_merged_matched)
print(ndvi)

In [None]:
# Plot the outputted NDVI
ndvi."_____"()

In [None]:
# Plot a histogram to see the spread of values accross 50 bins
ndvi.plot.hist(bins="_____")

In [None]:
# Discretize the color plot by specifying the intervals
class_bins = (-1, 0., 0.2, 0.7, 1)
ndvi.plot(levels=class_bins)

In [None]:
# Missing values can be interpolated from the values of neighbouring grid cells using the .interpolate_na method. 
ndvi_nonan = ndvi.interpolate_na(dim="x")

# Save the output with the 'year' variable as part of the file name
ndvi_nonan.rio.to_raster("NDVI"+str(year)+".tif")

## Classifying Continuous Rasters in Python

Reduce the complexity of the map by classifying it. 

Classification involves assigning each pixel in the raster to a class based on its value. 

In Python, we can accomplish this using the *numpy.digitize* function

Note: by default, each class includes the left but not the right bound. This is not an issue here, since the computed range of NDVI values is fully contained in the open interval (-1; 1) (see exercise above).

In [None]:
import numpy as np
import xarray

# Defines the bins for pixel values
class_bins = (-1, 0., 0.2, 0.7, 1)

# The numpy.digitize function returns an unlabeled array, in this case, a
# classified array without any metadata. That doesn't work--we need the
# coordinates and other spatial metadata. We can get around this by using
# "xarray.apply_ufunc", which can run the function across the data array while
# preserving metadata.
ndvi_classified = xarray.apply_ufunc(
    np.digitize,
    ndvi_nonan,
    class_bins,
    dataset_fill_value=np.nan
)

In [None]:
# Visualize the classified NDVI, customizing the plot with proper title and legend
import earthpy.plot as ep
import matplotlib.pyplot as plt

from matplotlib.colors import ListedColormap

# Define color map of the map legend
ndvi_colors = ["blue", "gray", "green", "darkgreen"]
ndvi_cmap = ListedColormap(ndvi_colors)

# Define class names for the legend
category_names = [
    "Water",
    "No Vegetation",
    "Sparse Vegetation",
    "Dense Vegetation"
]

# We need to know in what order the legend items should be arranged
category_indices = list(range(len(category_names)))

# Make the plot
im = ndvi_classified.plot(cmap=ndvi_cmap, add_colorbar=False)
plt.title("Classified NDVI")
# earthpy helps us by drawing a legend given an existing image plot and legend items, plus indices
ep.draw_legend(im_ax=im, classes=category_indices, titles=category_names)

# Save the figure (optional)
# plt.savefig("NDVI_classified.png", bbox_inches="tight", dpi=300)

In [None]:
# Export the classified NDVI raster object to a GeoTiff
ndvi_classified.rio.to_raster("NDVI"+str(year)+"_classified.tif", dtype="int32")

In [None]:
# Plot a histogram of the classified data
ndvi_classified.plot."_____"()

In [None]:
# Load both raster datasets: NDVI.tif and NDVI_classified.tif. 
# Then, calculate zonal statistics for each class_bins. Inspect the output of the zonal_stats function.

from xrspatial import zonal_stats
stats=zonal_stats(ndvi_classified.squeeze(), ndvi.squeeze())
stats

In [None]:
# Output counts to a csv file

# First check if this file exists
file_path = "output.csv"
try: 
    with open(file_path, 'x') as file: 
        file.write("year,"+",".join(category_names)+ "\n") 
except FileExistsError: 
    pass

# Append to the existing file
with open(file_path, 'a') as file: 
    file.write(str(year)+","+",".join(str(x) for x in stats["count"])+ "\n") 