# This notebook demonstrates the workflow of creating a multidimensional cloud-free Landsat imagery cube from Microsoft Planetary Computer for the Amazon forest

The workflow includes the following steps:
1. Find all the Landsat scenes from 1984-2023 in the Amazon forest
2. Create a RasterCollection from all the scenes
3. Use a custom function to remove cloud pixels from all the Landsat images
4. Use the median function to find the best pixel from all overlapping images in each year
5. Convert the result to a yearly multidimensional raster

#### Estimated running time: 8 hrs (reduce the number of years to reduce running time)

### Import relevant modules and create a RasterCollection of Landsat-9 images from planetary computer archive 

In [None]:
import arcpy
from arcpy import AIO

In [None]:
# Create an AIO object from the cloud storage connection file for data access
a = AIO(r'C:\AMPC_Resources\ACS_Files\esrims_pc_landsat-c2-l2.acs')

In [None]:
# Define the query to search for images
query = {
    "collections": ["landsat-c2-l2"], # Landsat collection 2 level 2 product (https://planetarycomputer.microsoft.com/dataset/landsat-c2-l2)
    "bbox": [-51.9370008, -6.4641491 , -51.7794525, -6.2008215], # Define bounding box over the Amazon forest
    "query": {"eo:cloud_cover": {"lt": 10}}, # Define cloud cover requirements    
    "datetime": "1984-01-01/2023-12-31", # Define time range
    "limit": 1000, # Define max number of results to be returned
          }

**Certain files may be corrupted in the MPC collection so some datasets may be skipped**

In [None]:
# Create a RasterCollection object that contains the search results
rc = arcpy.ia.RasterCollection.fromSTACAPI(stac_api="https://planetarycomputer.microsoft.com/api/stac/v1",
                                  query=query,
                                  attribute_dict={
                                      "Name":"id",
                                      "Cloud Cover":"eo:cloud_cover",
                                      "StdTime":"datetime",
                                      "Platform":"platform",
                                      "Spatial Reference":"proj:epsg",
                                      "Extent": "bbox",
                                    },
                                    # Note the bands imported through assetManagement will be renamed to Band_1, Band_2 ... etc
                                    context={"assetManagement": ["red", "green", "blue", "nir08", "qa_pixel"]})


In [None]:
# Visualize the RasterCollection
rc

In [None]:
# Get number of scenes from each sensor
print(f"{'Total number of scenes:' :<25} {len(rc)}")
print(f"{'Landsat 9:' :<25} {rc.getFieldValues('Platform').count('landsat-9')}")
print(f"{'Landsat 8:' :<25} {rc.getFieldValues('Platform').count('landsat-8')}")
print(f"{'Landsat 7:' :<25} {rc.getFieldValues('Platform').count('landsat-7')}")
print(f"{'Landsat 5:' :<25} {rc.getFieldValues('Platform').count('landsat-5')}")
print(f"{'Landsat 4:' :<25} {rc.getFieldValues('Platform').count('landsat-4')}")

### Define a function to remove cloud pixels and compute cloud-free composite

In [None]:
# Define function to remove cloud pixels based on QA band
def remove_cloud(item):
    raster = item['Raster']
    # QA band is renamed to 'Band_5' after import
    qa_band = raster.getRasterBands(['Band_5'])
    
    cloud_mask = arcpy.ia.TransposeBits(qa_band, [0,1,2,3,4], [0,1,2,3,4], 0, None)
    value_mask = ~cloud_mask
    
    cloud_free_raster = arcpy.ia.Clip(raster, aoi = value_mask)
    
    return {'raster': cloud_free_raster, "Name": item["Name"], "StdTime": item["StdTime"]}

In [None]:
# Apply the above function to remove cloud pixels from each image in the RasterCollection
rc_cloud_free = rc.map(remove_cloud)

In [None]:
year_medians = []
years = []

# Loop through the years to create a cloud-free composite for each year
for year in range(1984,2024):
    # Filter the collection by each year
    rc_year = rc_cloud_free.filterByTime(str(year)+'-01-01', str(year)+'-12-31', time_field_name = "StdTime", date_time_format = '%Y-%m-%d')
    
    if rc_year:    
        # Apply the Median function to find the most representative pixels value from overlapping images        
        cloud_free_composite_median = rc_year.median(ignore_nodata = True, extent_type = 'UnionOf')
        # Add the output raster to list
        year_medians.append(cloud_free_composite_median)
        years.append(year)

In [None]:
# Create a RasterCollection of yearly median composites
rc_yearly_median = arcpy.ia.RasterCollection(rasters = year_medians, attribute_dict = {'variable': 'median', 'StdTime': years})

In [None]:
# Convert the RasterCollection to a multidimensional raster
yearly_median_mdim = rc_yearly_median.toMultidimensionalRaster(variable_field_name = 'variable', dimension_field_names = 'StdTime')

In [None]:
%%time
# Save the multidimensional raster to disk
yearly_median_mdim.save(r'c:\temp\yearly_median.crf')