# ARC - Crop Monitoring with GEE Sentinel-2 Data

[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/profLewis/ARC/blob/main/notebooks/test_gee.ipynb)

This notebook demonstrates the ARC pipeline using Sentinel-2 L2A data from **Google Earth Engine (GEE)**.

## Prerequisites

1. A [Google Earth Engine](https://earthengine.google.com/) account
2. Run the authentication cell below and follow the prompts

## 1. Install ARC

In [None]:
!pip install -q https://github.com/profLewis/ARC/archive/refs/heads/main.zip

## 2. Authenticate with Google Earth Engine

In [None]:
import ee

ee.Authenticate()
ee.Initialize(opt_url='https://earthengine-highvolume.googleapis.com')
print('GEE authenticated and initialised.')

In [None]:
# Test GEE connection
print('Testing GEE connection...')
image = ee.Image(1)
result = image.getInfo()
print(f'  GEE connection: OK (test image type: {result["type"]})')

## 3. Test archetype generation (no GEE needed)

This tests the forward model and ensemble generation locally.

In [None]:
import arc
import numpy as np
import matplotlib.pyplot as plt
import os
from pathlib import Path

doys = np.arange(1, 366, 5)
angs = (
    np.array([30] * len(doys)),
    np.array([10] * len(doys)),
    np.array([120] * len(doys)),
)

s2_refs, pheo_samples, bio_samples, orig_bios, soil_samples = arc.generate_arc_refs(
    doys=doys,
    start_of_season=150,
    growth_season_length=45,
    num_samples=10000,
    angs=angs,
    crop_type='maize'
)

print(f'Archetype spectra shape: {s2_refs.shape}')
print(f'Biophysical parameters shape: {orig_bios.shape}')

max_lai = np.nanmax(orig_bios[4], axis=0)
ndvi = (s2_refs[7] - s2_refs[3]) / (s2_refs[7] + s2_refs[3])
max_ndvi = np.nanmax(ndvi, axis=0)

plt.figure(figsize=(8, 6))
plt.plot(max_ndvi, max_lai / 100, 'o', ms=3, alpha=0.05)
plt.xlabel('Max NDVI')
plt.ylabel('Max LAI (m$^2$/m$^2$)')
plt.title('Archetype ensemble: NDVI vs LAI')
plt.grid(True, alpha=0.3)
plt.show()
print('Archetype generation: OK')

## 4. Test GEE Sentinel-2 data retrieval

Download S2 data for a South African wheat field from Google Earth Engine.

In [None]:
import eof

arc_dir = os.path.dirname(os.path.realpath(arc.__file__))
geojson_path = f'{arc_dir}/test_data/SF_field.geojson'

S2_data_folder = Path.home() / 'Downloads/SF_field_gee'
S2_data_folder.mkdir(parents=True, exist_ok=True)

print('Retrieving Sentinel-2 data from GEE...')
s2_refs, s2_uncs, s2_angles, doys, mask, geotransform, crs = eof.get_s2_official_data(
    start_date='2022-07-15',
    end_date='2022-11-30',
    geojson_path=geojson_path,
    S2_data_folder=str(S2_data_folder),
    source='gee',
)

print(f'\nResults:')
print(f'  s2_refs shape:   {s2_refs.shape}  (images, bands, H, W)')
print(f'  s2_angles shape: {s2_angles.shape}  [SZA, VZA, RAA]')
print(f'  doys:            {doys}')
print(f'  mask shape:      {mask.shape}')
print(f'  N images:        {s2_refs.shape[0]}')
print(f'  Valid pixels:    {(~mask).sum()} / {mask.size}')

In [None]:
# Plot NDVI time series from GEE data
ndvi = (s2_refs[:, 7] - s2_refs[:, 2]) / (s2_refs[:, 7] + s2_refs[:, 2])
mean_ndvi = np.nanmean(ndvi, axis=(1, 2))

plt.figure(figsize=(12, 4))
valid = np.isfinite(mean_ndvi)
plt.plot(doys[valid], mean_ndvi[valid], '--o', label='Mean NDVI (GEE)')
plt.xlabel('Day of year')
plt.ylabel('NDVI')
plt.title('GEE Sentinel-2: Mean NDVI Time Series')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

## 5. Run the full ARC pipeline with GEE

In [None]:
START_OF_SEASON = 225
CROP_TYPE = 'wheat'
NUM_SAMPLES = 100000
GROWTH_SEASON_LENGTH = 45

print('Running full ARC pipeline with GEE data source...')
scale_data, post_bio_tensor, post_bio_unc_tensor, mask, doys = arc.arc_field(
    s2_start_date='2022-07-15',
    s2_end_date='2022-11-30',
    geojson_path=geojson_path,
    start_of_season=START_OF_SEASON,
    crop_type=CROP_TYPE,
    output_file_path=f'{S2_data_folder}/SF_field_gee.npz',
    num_samples=NUM_SAMPLES,
    growth_season_length=GROWTH_SEASON_LENGTH,
    S2_data_folder=str(S2_data_folder),
    data_source='gee',
)

print(f'\nPipeline complete!')
print(f'  post_bio_tensor shape: {post_bio_tensor.shape}')
print(f'  post_bio_unc_tensor shape: {post_bio_unc_tensor.shape}')

## 6. Plot LAI time series

In [None]:
plt.figure(figsize=(12, 6))
step = max(1, post_bio_tensor.shape[0] // 100)
plt.plot(doys, post_bio_tensor[::step, 4].T / 100, '-', lw=1.5, alpha=0.6)
plt.ylabel('LAI (m$^2$/m$^2$)')
plt.xlabel('Day of year')
plt.title('LAI time series (GEE data source)')
plt.grid(True, alpha=0.3)
plt.show()

## 7. Plot LAI maps

In [None]:
lai = post_bio_tensor[:, 4].T / 100
nrows = int(np.ceil(len(doys) / 5))
fig, axs = plt.subplots(ncols=5, nrows=nrows, figsize=(20, 4 * nrows))
axs = axs.ravel()

for i in range(len(doys)):
    lai_map = np.full(mask.shape, np.nan)
    lai_map[~mask] = lai[i]
    im = axs[i].imshow(lai_map, vmin=0, vmax=7)
    fig.colorbar(im, ax=axs[i], shrink=0.8, label='LAI (m$^2$/m$^2$)')
    axs[i].set_title(f'DOY: {doys[i]}')

for i in range(len(doys), len(axs)):
    axs[i].axis('off')

plt.suptitle('LAI Maps (GEE data source)', y=1.02, fontsize=14)
plt.tight_layout()
plt.show()

## 8. Validate output

In [None]:
print('Validation checks:')
print(f'  post_bio_tensor ndim == 3: {post_bio_tensor.ndim == 3}')
print(f'  7 biophysical params: {post_bio_tensor.shape[1] == 7}')
print(f'  N dates matches doys: {post_bio_tensor.shape[2] == len(doys)}')

lai_phys = post_bio_tensor[:, 4, :] / 100
print(f'  LAI range: [{lai_phys.min():.2f}, {lai_phys.max():.2f}] m2/m2')
print(f'  LAI range valid (0-10): {lai_phys.min() >= 0 and lai_phys.max() <= 10}')

print('\nAll checks passed!' if post_bio_tensor.shape[1] == 7 else '\nSome checks failed.')