# Notebook setup
**1.** Import the Earth Engine, geemap, and 3rd party libraries.

In [None]:

%pip install -q --upgrade altair
%pip install h3==3.7.6 tobler
import altair as alt

import ee
import geemap as geemap

import numpy as np
import pandas as pd
import geopandas as gpd

import seaborn as sns
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import ImageGrid, make_axes_locatable

from shapely.wkt import loads
from tobler.util import h3fy


2. Authenticate and initialize to the Earth Engine service.
*  This Requires a Google Cloud Project you have access to that is registered to use Earth Engine



In [None]:
project_id = 'sdsc24-nyc' # Change to a project where you have EEE access.
ee.Authenticate()
ee.Initialize(project=project_id) # Change to a project where you have EEE access.

# Section 1 - The Basics


## Add raster data to a map
**1.** Load climate data for a given period and display its metadata.

In [None]:
sept_2024_climate = (
    ee.ImageCollection('ECMWF/ERA5_LAND/MONTHLY_AGGR')
    .filterDate('2024-09', '2024-10')
    .first()
)
sept_2024_climate

**2.** Instantiate a map object and add the temperature band as a layer with
specific visualization properties. Display the map.

In [None]:
m = geemap.Map(center=[30, 0], zoom=2)

vis_params = {
    'bands': ['temperature_2m'],
    'min': 229,
    'max': 304,
    'palette': 'inferno',
}
m.add_layer(sept_2024_climate, vis_params, 'Temperature (K)')
m

## Add vector data to a map

**1.** Create a vector data object with points for three cities.

In [None]:
cities = ee.FeatureCollection([
    ee.Feature(ee.Geometry.Point(10.75, 59.91), {'city': 'Oslo'}),
    ee.Feature(ee.Geometry.Point(-118.24, 34.05), {'city': 'Los Angeles'}),
    ee.Feature(ee.Geometry.Point(103.83, 1.33), {'city': 'Singapore'}),
])
cities

**2.** Add the city locations to the map and redisplay it.

In [None]:
m.add_layer(cities, name='Cities')
m

**2.** Extract the climate data for the three cities as a pandas DataFrame.

In [None]:
city_climates = sept_2024_climate.reduceRegions(cities, ee.Reducer.first())

city_climates_dataframe = ee.data.computeFeatures(
    {'expression': city_climates, 'fileFormat': 'PANDAS_DATAFRAME'}
)
city_climates_dataframe

**3.** Plot the temperature for the cities as a bar chart.

In [None]:
alt.Chart(city_climates_dataframe).mark_bar(size=100).encode(
    alt.X('city:N', sort='y', axis=alt.Axis(labelAngle=0), title='City'),
    alt.Y('temperature_2m:Q', title='Temperature (K)'),
    tooltip=[
        alt.Tooltip('city:N', title='City'),
        alt.Tooltip('temperature_2m:Q', title='Temperature (K)'),
    ],
).properties(title='September 2024 temperature for selected cities', width=500)

## More Advanced Raster Processing


**1.** Create a sophisticated cloud-free Sentinel 2 Mosaic
This will *dynamically* create a cloud-free mosaic for an entire year's worth of 10m Sentinel 2 Imagery and display it on a geemap.

1 year of Sentinel 2 Imagery is ~3.5 petabytes

In [None]:
# Harmonized Sentinel-2 Level 2A collection.
# https://developers.google.com/earth-engine/datasets/catalog/COPERNICUS_S2_SR_HARMONIZED

s2 = ee.ImageCollection('COPERNICUS/S2_SR_HARMONIZED');

# Cloud Score+ image collection.
# https://developers.google.com/earth-engine/datasets/catalog/GOOGLE_CLOUD_SCORE_PLUS_V1_S2_HARMONIZED
# Note Cloud Score+ is produced from Sentinel-2 Level 1C data
# and can be applied to either L1C or L2A collections.

csPlus = ee.ImageCollection('GOOGLE/CLOUD_SCORE_PLUS/V1/S2_HARMONIZED');

# This function adds a band representing the image timestamp.
def maskThreshold(img):
  return img.updateMask(img.select(QA_BAND).gte(CLEAR_THRESHOLD));

# Link both collections for all images with > 60% CS score (60% of what you'd expect to see on a clear day)
# Reduce the linked collection by the median of all values at each pixel across each band
QA_BAND = 'cs'
CLEAR_THRESHOLD = 0.60
composite = s2.filterDate('2023-01-01', '2023-12-31').linkCollection(csPlus, [QA_BAND]).map(maskThreshold).median();

# Select the R,G, and B bands for visualization and generate an X/Y/Z URL file pattern of the resulting image.
composite = composite.select(['B4', 'B3', 'B2'])
s2Viz = {'min': 0, 'max': 2500};
mapID = composite.getMapId(s2Viz)


In [None]:
#Note, if you want to use these tiles elswwhere, you can get the tile pattern
xyzURL = mapID['tile_fetcher'].url_format
xyzURL

**2.** Now display the cloud-free mosaic on a geemap


In [None]:
m = geemap.Map(center=[40.791530811684396, -73.95543742118639], zoom=11)

m.add_layer(composite, s2Viz, 'S2 CS+ 2022')
m

# Section 2 - Dataframes and Time Series


In the last module we worked through some basic operations and visualized the results as map layers. For understanding temporal dynamics, which is a common desire when working with remote sensing, you will want a line graph that shows quantities of a variable over time, commonly known as a time series graph.

The [VIIRS](https://developers.google.com/earth-engine/datasets/catalog/NOAA_VIIRS_DNB_MONTHLY_V1_VCMSLCFG) dataset has many uses, including observing "Nighttime Lights" which are a proxy for human activity and urban extent.

**Our tasks in this exercise:**
1. Extracting VIIRS time series data and converting to a pandas dataframe
2. Create a 2014-2024 time series graph from VIIRS-DNB data for a point in Seoul, South Korea

First, create a split geemap showing the differences between 2014 and 2024

In [None]:
viirs2014 = ee.ImageCollection("NOAA/VIIRS/DNB/MONTHLY_V1/VCMSLCFG").filterDate('2014-01-01','2014-12-31')
viirs2024 = ee.ImageCollection("NOAA/VIIRS/DNB/MONTHLY_V1/VCMSLCFG").filterDate('2024-01-01','2024-12-31')

left_layer = geemap.ee_tile_layer(viirs2014.select('avg_rad').median(), {'min':0,'max':10},"VIIRS 2014")
right_layer = geemap.ee_tile_layer(viirs2024.select('avg_rad').median(), {'min':0,'max':10},"VIIRS 2024")

Map = geemap.Map(center=[38.402, 127.096], zoom=7)
Map.split_map(left_layer, right_layer)
Map


In [None]:

# identify a 500 meter buffer around our Point Of Interest (POI)
poi = ee.Geometry.Point(127.072483, 37.515817).buffer(500)

viirs = ee.ImageCollection("NOAA/VIIRS/DNB/MONTHLY_V1/VCMSLCFG").filterDate('2014-01-01','2024-12-31')

**1.** Extracting all image values in the collection

To make a time series, we need to get all the values in our collection using the `map` function. We'll create a custom function in Python that takes a single image as an input and reduces the data in a given region (our point of interest in this case).

We'll get the mean of the pixels in our region and set the scale to 30. We'll use the `avg_rad` band.

We'll then need to set this reduced info as a property (we'll call it "mean") in our image so that the output of our function is to get the mean radiance of a particular region, and add this as a property on our image along with the date.

In [None]:
def poi_mean(img):
    mean = img.reduceRegion(reducer=ee.Reducer.mean(), geometry=poi, scale=30).get('avg_rad')
    return img.set('date', img.date().format()).set('mean',mean)

**2.** We map this function to every image in our collection to get a new ImageCollection, but now each image has the mean value for the region of interest and the date. These are the data we'll make our time series out of.

In [None]:
poi_reduced_imgs = viirs.map(poi_mean)

**3.** To convert to a pandas dataframe, we dont want an ImageCollection; however, so we will reduce our images to a list of lists:
- for each image, we have a 2-element list that contains that images date and mean value (for our point of interest)
- each of these lists are themselves elements in our outer list, which is what we'll convert to a dataframe

In [None]:
nested_list = poi_reduced_imgs.reduceColumns(ee.Reducer.toList(2), ['date','mean']).values().get(0)
nested_list

**4.** This nested list can be turned into a dataframe using the `.DataFrame` constructor. We'll name the columns, "date" and "mean".

In [None]:
# We need to call the callback method "getInfo" to retrieve the data
df = pd.DataFrame(nested_list.getInfo(), columns=['date','mean'])

df

**5.** Now we have a dataframe with each observation (month) as a row and it has the timestamp and mean values.

A reason pandas is so powerful is that it has built-ins, like a method to convert our string timestamp into a native timestamp data type.

We will also set our date column to be our index. Doing these things allows our .plot library to automatically convert the dates into nice readable dates on our axis.

In [None]:
df['date'] = pd.to_datetime(df['date'])

df = df.set_index('date')

df

6. Create a time series graph for VIIRS-DNB from Jan 2014 to May 2020 for Seoul Olympic Stadium

In [None]:
koreaMap = geemap.Map()
koreaMap.centerObject(poi, zoom=15)
koreaMap.add_basemap("SATELLITE")
koreaMap.addLayer(poi, {}, "Seoul Olympic Stadium")
koreaMap.addLayerControl()
koreaMap

**7.** The buffer encompasses almost the entire Olympic complex.

Pandas dataframes integrates nicely with our plotting libraries. We'll use seaborn to make a simple time series line plot.

In [None]:
# Create a figure with pyplot and set the dimensions to 15 x 7
fig, ax = plt.subplots(figsize=(15,7))

# Create the plot by setting the dataframe to the data argument
sns.lineplot(data=df, ax=ax)

# Set the labels and title
ax.set_ylabel('mean radiance',fontsize=20)
ax.set_xlabel('date',fontsize=20)
ax.set_title('Monthly mean radiance for Seoul Olympic Stadium (Jan 2014 to Jan 2024)',fontsize=20);

**8.** Brief observations: There are some strong seasonal patterns here (peaks and valleys), which is perhaps not too surprising for a sports stadium. There is also see a spike in late 2018 and 2022 as well as the drop-off in early 2020 that is very likely an indication of COVID-19 impacts.

What is that spike? With dataframes it is easy to find the date of that peak using the `.idxmax()` method to get the index (which is the month) of the maximum value.

In [None]:
df.idxmax()

September is a popular month for many concerts and sporting events, and September 2018 was the venue's 30th anniversary.

# Section 3 - Pulling Data Client-side with computePixels (or getPixels) and Data converters



**1.** **Data converters** are client-side conversion capabilities built into getPixels, computePixels, listFeatures, and computeFeatures. By specifying a compatible fileFormat, these methods can return data in Python-native formats like structured NumPy arrays for rasters and Pandas DataFrames or GeoPandas GeoDataFrames for vectors. In the case of vectors, the listFeatures and computeFeatures methods will make several network requests to fetch all the pages of the table before returning the Python object.

All of these methods transfer data from Earth Engine servers to a client machine using the interactive processing environment, which is optimized for answering small requests quickly. As such, it enforces limits on request size and compute time. You'll need to keep this in mind as you're coding your analysis and decide whether exporting data using the batch processing environment would be better. For example, see ee.date.computePixel limits in the reference docs.

Some common use cases for data converters are fetching many small image tiles in parallel (e.g., training ML models or automated serial workflows) and for visualization and data exploration with your favorite Python libraries. This notebook focuses on data exploration and visualization; if you're interested in learning about fetching data in parallel, see the Medium blog post "Pixels to the people!".


**2.** Computed Earth Engine data

Computed Earth Engine data are those that are generated on the fly through instantiation of non-asset data, computation, or manipulation; they are not stored on disk for later retrieval. To request conversion of computed data, you can use the `ee.data.computeFeatures` and `ee.data.computePixels` functions for `ee.FeatureCollection` and `ee.Image` objects, respectively.

To pull data from previously computed or imported Earth Engine assets, you would use `ee.data.getPixels`

**3.** This exercise will utilize computePixels and Data converters to pull precipitation data directly from Earth Engine into a client-side NumPy array.

First create a 12 band image (one band per month) from the [WorldClim Climatology ImageCollection](https://developers.google.com/earth-engine/datasets/catalog/WORLDCLIM_V1_MONTHLY#bands) using aggregate_array which aggregates over a given property of the objects in a collection, calculating a list of all the values of the selected property.

In [None]:
wa = ee.FeatureCollection(BOUNDARIES_ID).filter(
    'ADM0_NAME == "United States of America" && '
    'ADM1_NAME == "Washington"'
)

precip = ee.ImageCollection('WORLDCLIM/V1/MONTHLY').select('prec')

months = precip.aggregate_array('month').getInfo()

band_names = [f'prec_month_{str(m).zfill(2)}' for m in months]

monthly_precip = ee.Image(precip.toBands().rename(band_names))

monthly_precip



**4.** Now, levearging computePixels, make several simultaneous calls to Earth Engine to create a client-side NumPy array of the image which is clipped to Washington State at a scale of 1,500 meters (to limit the amount of data returned to the client).


In [None]:
monthly_precip_washington = monthly_precip.clipToBoundsAndScale(
    geometry=wa, scale=1500
)

monthly_precip_npy = ee.data.computePixels({
    'expression': monthly_precip_washington,
    'fileFormat': 'NUMPY_NDARRAY'
})

monthly_precip_npy

**5.** NumPy structured arrays work well for multiband image data. You can think of them as a table of arrays where each band is a column accessible from a field (band) name. It also permits each band to have a different data type.

For example, get the list of field (band) names and then subset an array by name and print its shape and display a preview of it.

In [None]:
names = monthly_precip_npy.dtype.names
print('field names:', names)

prec_month_10_arr = monthly_precip_npy['prec_month_10']
print('Selected array (band) shape:', prec_month_10_arr.shape)
display(prec_month_10_arr)
plt.imshow(prec_month_10_arr, vmin=0, vmax=320)

**6.** Since the array has *all* months of mean precipitation, we can use the matplotlib [`ImageGrid`](https://matplotlib.org/stable/gallery/axes_grid1/simple_axesgrid.html) function to show a time series grid for simple visual interpolation of intra-annual precipitation patterns.

In [None]:
# Set up the figure and grid.
fig = plt.figure(figsize=(20.0, 20.0))
grid = ImageGrid(
    fig,
    111,
    nrows_ncols=(4, 3),
    axes_pad=0.4,
    cbar_mode="single",
    cbar_location="right",
    cbar_pad=0.4,
    cbar_size="2%",
)

# Display each band to a grid cell.
for ax, name in zip(grid, names):
    ax.imshow(monthly_precip_npy[name], vmin=0, vmax=500)
    ax.set_title(name)

# Add colorbar.
colorbar = plt.colorbar(ax.get_children()[0], cax=grid[0].cax)
colorbar.set_label("Precipitation (mm)")

plt.show()

# Section 4 - Server-side Processing for Zonal Statistics

**1.** By now we've done a combination of both client-side downloading and server-side processing of raster data. Downloading raster data to the client can be useful for small jobs and sometimes is the right approach. However, often it is beneficial to leverage the power of Earth Engine to compute Zonal Statistics (reduceRegion, reduceRegions) server-side, and pull only the resulting statistics to the client.

This example will demonstrate the creation of a continental-scale regular grid geometry in the client, then recreating that grid as a server-side geometry to use as the regions to calculate the zonal statisctis for mean air temperature for each cell in the grid over an 8 year period.

Start by pulling the boundary of the United States from a BigQuery Public Dataset.

In [None]:
query = """SELECT * FROM `bigquery-public-data.geo_us_boundaries.national_outline`"""
conus = pd.read_gbq(query,project_id=project_id)

conus = conus.iloc[:, 0].apply(loads) #get the geometric object from string
conus = gpd.GeoDataFrame(conus, geometry=conus.name, crs='epsg:4326') #convert conus to a geodataframe
conus

**2.** Create a GeoDataFrame of a regular hexagonal H3 Grid for conus

In [None]:
hexgrid_us = h3fy(conus.buffer(0.01), resolution=4).reset_index()

**3.** Trim to just conus, select rows with latitudes (20 to 60) and longitudes (-130 to -65) and plot the GeoDataFrame.

In [None]:
hexgrid_us['longitude'] = hexgrid_us['geometry'].apply(lambda x: x.centroid.coords.xy[0][0])
hexgrid_us['latitude'] = hexgrid_us['geometry'].apply(lambda x: x.centroid.coords.xy[1][0])
hexgrid_us = hexgrid_us[(hexgrid_us['longitude'].between(-130, -65)) & (hexgrid_us['latitude'].between(20, 60))][['hex_id', 'geometry']]

fig, ax = plt.subplots(figsize=(30, 26))

hexgrid_us.plot(ax=ax, color='orange', edgecolor='grey', alpha=0.6)
ax.set_axis_off()

**4.** The 'hexgrid_us' GeoDataFrame is more than 4,200 individual polygons. We COULD pull back a lot of data to the client and intersect and calculate a statistic on the intersection of each of these polygons in the client, but instead, we will push them to Earth Engine as a FeatureCollection and use reduceRegions to return just a table of the statistic we want.

In [None]:
# Create an Earth Engine Feature from a Shapely geometry
def get_ee_feature(geom):
    x,y = geom.exterior.coords.xy
    coords = np.dstack((x,y)).tolist()
    g = ee.Geometry.Polygon(coords)
    return ee.Feature(g)

features = ee.FeatureCollection([get_ee_feature(x[1].geometry) for x in hexgrid_us.iterrows()])

# Create a single image from an ImageCollection and select the band you are interested in
image_collection = ee.ImageCollection("ECMWF/ERA5_LAND/MONTHLY_AGGR")
image = image_collection.filterDate('2015-01-01', '2023-12-31').select('temperature_2m').mean()

# Calculate Zonal Statistics on Eart Engine and create a GeoDataFrame
zone_stats = image.reduceRegions(collection=features, reducer=ee.Reducer.mean(), scale=11132, tileScale=1).getInfo()
zone_stats = gpd.GeoDataFrame.from_features(zone_stats, crs='epsg:4326')

**5.** Plot a chloropleth map of the GeoDataFrame based on the statistics for each zone that was calculated in Earth Engine

In [None]:
fig, ax= plt.subplots(figsize=(30,26))

# Define the legend axes
divider = make_axes_locatable(ax)
cax = divider.append_axes("right", size="5%", pad=0.1)

# Plot the US map with hexagons where the colour of each hexagon represents the mean air temperature value
zone_stats.plot(column='mean', ax=ax, cmap='OrRd', legend=True, cax=cax)

# Customise the legend
cax.minorticks_on()
cax.tick_params(which='major', direction='in', length=18, width=2, labelsize=24)
cax.tick_params(which='minor', direction='in', length=12, width=2, labelsize=24)
cax.set_title('Temperature (K)', fontsize=24)

# Turn the x-axis and y-axis off
ax.set_axis_off()